|
@@ -0,0 +1,210 @@
|
|
|
+c='/* s2 - perfect hash table lookups
|
|
|
+ Copyright (C) 2019 Ariadne Devos
|
|
|
+
|
|
|
+ This program is free software: you can redistribute it and/or modify
|
|
|
+ it under the terms of the GNU General Public License as published by
|
|
|
+ the Free Software Foundation, either version 3 of the License, or
|
|
|
+ (at your option) any later version.
|
|
|
+
|
|
|
+ This program is distributed in the hope that it will be useful,
|
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
+ GNU General Public License for more details.
|
|
|
+
|
|
|
+ You should have received a copy of the GNU General Public License
|
|
|
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
+/* This comment applies to both the script and code embedded in the
|
|
|
+ generated output. */'
|
|
|
+# This assumes the Bash shell!
|
|
|
+IFS=' '
|
|
|
+# * Local variables
|
|
|
+# input for gperf
|
|
|
+tmpgperf=""
|
|
|
+# output for gperf
|
|
|
+gperfout=""
|
|
|
+# code to write before the generated code
|
|
|
+prefix=""
|
|
|
+# code to write after the generated code
|
|
|
+postfix=""
|
|
|
+# name of the table, set by prepare
|
|
|
+table=""
|
|
|
+
|
|
|
+function oops() {
|
|
|
+ echo -E "$1 (status code: $?)" >&2;
|
|
|
+ exit 2;
|
|
|
+}
|
|
|
+
|
|
|
+function opentmp() {
|
|
|
+ # pass-by-reference
|
|
|
+ local -n fd="$1"
|
|
|
+ local fd="$1"
|
|
|
+ local name="`mktemp`"
|
|
|
+ if [ -z "$name" ]; then
|
|
|
+ oops 'cannot create temporary file';
|
|
|
+ fi
|
|
|
+ # {fd}: allocate a file descriptor
|
|
|
+ exec {fd}<>"${name}" || oops 'cannot open temporary file'
|
|
|
+ # close the temporary file while it is open,
|
|
|
+ # to avoid any trapping
|
|
|
+ #
|
|
|
+ # There is a little race window, but empty files
|
|
|
+ # are small.
|
|
|
+ rm "${name}"
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+function prepare() {
|
|
|
+ table="$1"
|
|
|
+ # TODO: validate ${table}
|
|
|
+ # open a temporary file for gperf's input
|
|
|
+ opentmp tmpgperf
|
|
|
+ opentmp gperfout
|
|
|
+ opentmp prefix
|
|
|
+ opentmp postfix
|
|
|
+ cat >&"${tmpgperf}" <<EOF
|
|
|
+/* don't hide the hash table */
|
|
|
+%global-table
|
|
|
+/* Don't put strings into the wordlist, only offsets -- reduces memory usage */
|
|
|
+%pic
|
|
|
+/* Don't allow lookup tables to be modified -- facilitates static analysis,
|
|
|
+ may reduce memory usage on some shared memory systems
|
|
|
+ and catches accidental writes. */
|
|
|
+%readonly-tables
|
|
|
+%define hash-function-name ${table}_hash
|
|
|
+%define lookup-function-name ${table}_lookup_ignore
|
|
|
+%define string-pool-name ${table}_strings
|
|
|
+/* a hash table from keywords to their value */
|
|
|
+%define word-array-name ${table}_entries
|
|
|
+
|
|
|
+/* gperf requires the string pool offset to be named 'name'. */
|
|
|
+/* This has been duplicated from <sHT/phash.h>. It will be deleted from the
|
|
|
+ output. */
|
|
|
+struct sHT_perfect_entry { uint_least16_t name; uint_least16_t length; uint_least32_t value; };
|
|
|
+
|
|
|
+%struct-type
|
|
|
+%language=ANSI-C
|
|
|
+
|
|
|
+%{
|
|
|
+$c
|
|
|
+/* This is a generated file, do not edit manually! */
|
|
|
+#include <sHT/phash.h>
|
|
|
+/* For size_t, offsetof */
|
|
|
+#include <stddef.h>
|
|
|
+%}
|
|
|
+%%
|
|
|
+EOF
|
|
|
+ [ "$?" -eq 0 ] || oops 'cannot write gperf header'
|
|
|
+}
|
|
|
+
|
|
|
+function entry() {
|
|
|
+ local name="$1"
|
|
|
+ local value="$2"
|
|
|
+ # - TODO: generate verification conditions for the hashing of all
|
|
|
+ # keywords, for formal verification
|
|
|
+ if [ -z "$name" ] || [ -z "$value" ]; then
|
|
|
+ oops 'keywords and values may not be empty'
|
|
|
+ fi
|
|
|
+
|
|
|
+ echo -E "\"${name}\", sizeof(\"${name}\") - 1, $value" >&"${tmpgperf}" || oops 'cannot write hash table entry'
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+function pre() {
|
|
|
+ echo -E "$1" >&"$prefix" || oops 'cannot write preprocessor input'
|
|
|
+}
|
|
|
+
|
|
|
+function post() {
|
|
|
+ echo -E "$1" >&"$postfix" || oops 'cannot write preprocessor input'
|
|
|
+}
|
|
|
+
|
|
|
+function solve() {
|
|
|
+ # Generate the hash function and table
|
|
|
+ # /dev/fd is recognised by Bash and exists on my
|
|
|
+ # Linux box -- Debian 9. Bash doesn't have a rewind or seek builtin
|
|
|
+ #
|
|
|
+ # Post-process the output:
|
|
|
+ # - let the 'len' argument of the hash function be a size_t
|
|
|
+ # instead of unsigned int.
|
|
|
+ # (Also correct the hash function output type,
|
|
|
+ # and let it be inline)
|
|
|
+ # - don't allocate memory for a NUL terminator
|
|
|
+ # - don't emit the lookup function (static inline)
|
|
|
+ # (TODO: remove completely)
|
|
|
+ # - compute string offsets more according to the standard
|
|
|
+ # -- with offsetof
|
|
|
+ # - make sure string offsets of empty entries are within
|
|
|
+ # bounds (zero offset and length)
|
|
|
+ # (also, make the 'unknown' value explicit)
|
|
|
+ # (this may need to be repeated multiple times in a line)
|
|
|
+ # - remove the reference to strcmp
|
|
|
+ # - remove the definition of sHT_perfect_entry
|
|
|
+ set -o pipefail
|
|
|
+ gperf < "/dev/fd/${tmpgperf}" | \
|
|
|
+ sed -E \
|
|
|
+ -e 's/\bunsigned int len\b/size_t len/' \
|
|
|
+ -e 's/^static unsigned int$/static inline size_t/' \
|
|
|
+ -e 's/(char [a-zA-Z0-9_]+_strings_str[0-9]+\[sizeof\(".*"\))\];/\1 - 1];/' \
|
|
|
+ -e 's/^const struct sHT_perfect_entry \*$/static inline const struct sHT_perfect_entry \*/' \
|
|
|
+ -e 's/\(int\)\(long\)&\(\(struct ([a-zA-Z0-9_]+_strings)_t \*\)0\)->\1_str([0-9]+)\b/offsetof\(struct \1_t, \1_str\2\)/' \
|
|
|
+ -e ':begin s/\{-1\}/{ 0, 0, 0 }/;t begin' \
|
|
|
+ -e 's/!strcmp \(str \+ 1\, s \+ 1\)/0/' \
|
|
|
+ -e 's/^(struct sHT_perfect_entry) \{.*\};$/\1;/' \
|
|
|
+ >&"${gperfout}"
|
|
|
+ local err="$?"
|
|
|
+ if [ ! "$err" -eq 0 ]; then
|
|
|
+ echo -E "cannot generate hash function and table (status code: $err)"
|
|
|
+ # output the test case, to ease debugging,
|
|
|
+ echo -E 'Begin failed input:' >&2
|
|
|
+ # The input must be rewinded!
|
|
|
+ cat >&2 <"/dev/fd/${tmpgperf}"
|
|
|
+ echo -E 'End failed input.' >&2
|
|
|
+ exit 2;
|
|
|
+ fi
|
|
|
+ # Check the output:
|
|
|
+ # (the input must be rewinded!)
|
|
|
+ # - for switch statements (unhidden conditionals)
|
|
|
+ # (these probably can be hidden)
|
|
|
+ grep -E '\bswitch\b' >/dev/null < "/dev/fd/${gperfout}"
|
|
|
+ case "$?" in
|
|
|
+ 0) oops 'TODO: generated code gperf output contains unguarded branches' ;;
|
|
|
+ 1) ;;
|
|
|
+ 2) oops 'grep does not recognise unhidden conditional pattern' ;;
|
|
|
+ *) oops 'unexpected grep exit status' ;;
|
|
|
+ esac
|
|
|
+ # Generate an exported lookup function
|
|
|
+ # (Why only export a lookup function? For simpler live-patch consistency.)
|
|
|
+ # The input must be rewinded!
|
|
|
+ cat >&"${gperfout}" <<EOF
|
|
|
+
|
|
|
+static const struct sHT_perfect_table ${table}_table = {
|
|
|
+ .string_pool = ${table}_strings,
|
|
|
+ .values_length = sizeof(${table}_entries) / sizeof(*${table}_entries),
|
|
|
+ .values = ${table}_entries,
|
|
|
+};
|
|
|
+
|
|
|
+/* Carefully read <sHT/phash.h> before use */
|
|
|
+size_t
|
|
|
+${table}_lookup(const uint8_t string[], size_t length)
|
|
|
+{
|
|
|
+ size_t hash = ${table}_hash(string, length);
|
|
|
+ return sHT_perfect_lookup(&${table}_table, string, length, hash);
|
|
|
+}
|
|
|
+EOF
|
|
|
+ [ "$?" -eq 0 ] || oops 'cannot generate lookup function'
|
|
|
+ cat < "/dev/fd/${prefix}" || oops 'cannot output prefix'
|
|
|
+ cat < "/dev/fd/${gperfout}" || oops 'cannot output generated code'
|
|
|
+ cat < "/dev/fd/${postfix}" || oops 'cannot output postfix'
|
|
|
+}
|
|
|
+
|
|
|
+case "$1" in
|
|
|
+--help)
|
|
|
+ cat <<EOF
|
|
|
+Generate perfect hash tables.
|
|
|
+How to use:
|
|
|
+bash ./buffer/gen-phash.sh --gen instructions.sh > output.c
|
|
|
+EOF
|
|
|
+ [ "$?" -eq 0 ] || oops 'cannot output help message' ;;
|
|
|
+--gen)
|
|
|
+ source "$2" || oops 'cannot generate hash table' ;;
|
|
|
+*) oops 'unsupported option' ;;
|
|
|
+esac
|