лет назад: 9 · e07f3d5a9e
--- a/triehash/.travis.yml
+++ b/triehash/.travis.yml
@@ -0,0 +1,14 @@
 
				+language: perl
			
 
				+perl:
			
 
				+  - "5.24"
			
 
				+  - "5.22"
			
 
				+  - "5.20"
			
 
				+  - "5.18"
			
 
				+  - "5.16"
			
 
				+  - "5.14"
			
 
				+install:
			
 
				+  - cpanm --quiet --notest --skip-satisfied Devel::Cover Devel::Cover::Report::Codecov
			
 
				+script:
			
 
				+  - ./tests/run-tests.sh
			
 
				+after_script:
			
 
				+  - cover -report codecov
			
--- a/triehash/LICENSE.md
+++ b/triehash/LICENSE.md
@@ -0,0 +1,17 @@
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in
			
 
				+all copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+THE SOFTWARE.
			
--- a/triehash/README.md
+++ b/triehash/README.md
@@ -0,0 +1,58 @@
 
				+# Order-preserving minimal perfect hash function generator
			
 
				+
			
 
				+Build order-preserving minimal perfect hash functions.
			
 
				+
			
 
				+[![codecov](https://codecov.io/gh/julian-klode/triehash/branch/master/graph/badge.svg)](https://codecov.io/gh/julian-klode/triehash)
			
 
				+[![Build Status](https://travis-ci.org/julian-klode/triehash.svg?branch=master)](https://travis-ci.org/julian-klode/triehash)
			
 
				+
			
 
				+## Performance
			
 
				+
			
 
				+Performance was evaluated against other hash functions. As an input set, the
			
 
				+fields of Debian Packages and Sources files was used, and each hash function
			
 
				+was run 1,000,000 times for each word. The byte count of the words were then
			
 
				+summed up and divided by the total number of nanoseconds each function ran, so
			
 
				+all speeds below are given in bytes per nanosecond, AKA gigabyte per second.
			
 
				+
			
 
				+arch/function|jak-x230 (amd64)|backup (amd64)|asachi.d.o (arm64)|asachi.d.o (armel)|asachi.d.o (armhf)|plummer.d.o (ppc64el)|eller.d.o (mipsel)
			
 
				+-------------|----------------|--------------|------------------|------------------|------------------|---------------------|------------------
			
 
				+Trie         |      2.4       |      1.9     |      1.2         |      0.9         |      0.8         |      2.0            |      0.2
			
 
				+Trie (*)     |      2.2       |      1.7     |      0.8         |      0.7         |      0.7         |      1.8            |      0.2
			
 
				+re2c         |      1.7       |      1.3     |      0.9         |      0.9         |      0.7         |      1.6            |      0.2
			
 
				+re2c (*)     |      1.2       |      0.9     |      0.6         |      0.6         |      0.5         |      1.1            |      0.1
			
 
				+gperf (*)    |      0.7       |      0.5     |      0.2         |      0.2         |      0.2         |      0.5            |      0.1
			
 
				+gperf        |      1.3       |      0.9     |      0.3         |      0.3         |      0.2         |      0.4            |      0.1
			
 
				+djb (*)      |      0.7       |      0.5     |      0.3         |      0.3         |      0.3         |      0.5            |      0.1
			
 
				+djb (**)     |      1.0       |      0.7     |      0.4         |      0.5         |      0.5         |      0.6            |      0.2
			
 
				+djb          |      0.9       |      0.7     |      0.5         |      0.5         |      0.5         |      0.7            |      0.2
			
 
				+apt (*)      |      1.2       |      0.9     |      0.7         |      0.7         |      0.7         |      1.1            |      0.2
			
 
				+apt (**)     |      2.3       |      1.7     |      0.7         |      0.9         |      0.8         |      1.9            |      0.2
			
 
				+
			
 
				+And transposed:
			
 
				+
			
 
				+function/arch        |Trie     |Trie (*) |re2c     |re2c (*) |gperf (*)|gperf    |djb (*)  |djb (**) |djb      |apt (*)  |apt (**)
			
 
				+---------------------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------
			
 
				+jak-x230 (amd64)     |      2.4|      2.2|      1.7|      1.2|      0.7|      1.3|      0.7|      1.0|      0.9|      1.2|      2.3
			
 
				+backup (amd64)       |      1.9|      1.7|      1.3|      0.9|      0.5|      0.9|      0.5|      0.7|      0.7|      0.9|      1.7
			
 
				+asachi.d.o (arm64)   |      1.2|      0.8|      0.9|      0.6|      0.2|      0.3|      0.3|      0.4|      0.5|      0.7|      0.7
			
 
				+asachi.d.o (armel)   |      0.9|      0.7|      0.9|      0.6|      0.2|      0.3|      0.3|      0.5|      0.5|      0.7|      0.9
			
 
				+asachi.d.o (armhf)   |      0.8|      0.7|      0.7|      0.5|      0.2|      0.2|      0.3|      0.5|      0.5|      0.7|      0.8
			
 
				+plummer.d.o (ppc64el)|      2.0|      1.8|      1.6|      1.1|      0.5|      0.4|      0.5|      0.6|      0.7|      1.1|      1.9
			
 
				+eller.d.o (mipsel)   |      0.2|      0.2|      0.2|      0.1|      0.1|      0.1|      0.1|      0.2|      0.2|      0.2|      0.2
			
 
				+
			
 
				+
			
 
				+Legend:
			
 
				+
			
 
				+* The (*) variants are case-insensitive, (**) are more optimised versions
			
 
				+  of the (*) versions.
			
 
				+* DJB (*) is a DJB Hash with naive lowercase conversion, DJB (**) just ORs one
			
 
				+  bit into each value to get alphabetical characters to be lowercase
			
 
				+* APT (*) is the AlphaHash function from APT which hashes the last 8 bytes in a
			
 
				+  word in a case-insensitive manner. APT (**) is the same function unrolled.
			
 
				+* All hosts except the x230 are Debian porterboxes. The x230 has a Core i5-3320M,
			
 
				+  barriere has an Opteron 23xx.
			
 
				+
			
 
				+Notes:
			
 
				+
			
 
				+* The overhead is larger than needed on some platforms due to gcc inserting
			
 
				+  unneeded zero extend instructions, see:
			
 
				+  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77729
			
--- a/triehash/tests/framework.sh
+++ b/triehash/tests/framework.sh
@@ -0,0 +1,84 @@
 
				+#!/bin/sh
			
 
				+# Simple integration test framework
			
 
				+
			
 
				+set -e
			
 
				+
			
 
				+
			
 
				+cleanup() {
			
 
				+    rm -f test.output test.c test.h test.tree
			
 
				+}
			
 
				+
			
 
				+dumpone() {
			
 
				+    if [ -e "$@" ]; then
			
 
				+        echo "Content of $@:"
			
 
				+        cat "$@" | sed "s#^#\t#g"
			
 
				+    fi
			
 
				+
			
 
				+}
			
 
				+
			
 
				+dump() {
			
 
				+    dumpone test.output
			
 
				+    dumpone test.c
			
 
				+    dumpone test.h
			
 
				+    dumpone test.tree
			
 
				+    return 1
			
 
				+}
			
 
				+
			
 
				+testsuccess() {
			
 
				+    [ "$INNER" ] || cleanup
			
 
				+    [ "$INNER" ] || echo "Testing success of $@"
			
 
				+    if ! "$@" > test.output 2>&1; then
			
 
				+        echo "ERROR: Running $@ failed with error $?, messages were:" >&2
			
 
				+        dump
			
 
				+        return 1
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+testfailure() {
			
 
				+    [ "$INNER" ] || cleanup
			
 
				+    [ "$INNER" ] || echo "Testing failure of $@"
			
 
				+    if "$@" > test.output 2>&1; then
			
 
				+        echo "ERROR: Running $@ unexpectedly succeeded, messages were:" >&2
			
 
				+        dump
			
 
				+        return 1
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+testfileequal() {
			
 
				+    [ "$INNER" ] ||  echo "Testing output of $2"
			
 
				+    printf "%b\n" "$1" > expected
			
 
				+    if ! diff -u "expected" "$2" > test.diff; then
			
 
				+        echo "ERROR: Differences between expected output and and $2:" >&2
			
 
				+        cat test.diff | sed "s#^#\t#g"
			
 
				+        dump
			
 
				+        return 1
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+testgrep() {
			
 
				+    [ "$INNER" ] ||  echo "Testing grep $@"
			
 
				+    INNER=1 testsuccess grep "$@"
			
 
				+    unset INNER
			
 
				+}
			
 
				+
			
 
				+testsuccessequal() {
			
 
				+    expect="$1"
			
 
				+    shift
			
 
				+    cleanup
			
 
				+    echo "Testing success and output of $@"
			
 
				+    INNER=1 testsuccess "$@"
			
 
				+    INNER=1 testfileequal "$expect" test.output
			
 
				+    unset INNER
			
 
				+}
			
 
				+
			
 
				+
			
 
				+WORDS="Word-_0
			
 
				+Word = 42
			
 
				+VeryLongWord
			
 
				+Label ~ Word2
			
 
				+= -9"
			
 
				+
			
 
				+triehash() {
			
 
				+    printf "%b\n" "$WORDS" | perl -MDevel::Cover=-summary,0,-silent,1 $(dirname $(dirname $(readlink -f $0)))/triehash.pl "$@" || return $?
			
 
				+    return $?
			
 
				+}
			
--- a/triehash/tests/run-tests.sh
+++ b/triehash/tests/run-tests.sh
@@ -0,0 +1,22 @@
 
				+#!/bin/sh
			
 
				+DIR=$(dirname $(readlink -f $0))
			
 
				+
			
 
				+# Let's go into triehash.pl's dir
			
 
				+cd $(dirname "$DIR")
			
 
				+
			
 
				+rm -rf cover_db
			
 
				+
			
 
				+count=$(cd "$DIR" && echo test-* | wc -w)
			
 
				+i=1
			
 
				+
			
 
				+for test in $DIR/test-*; do
			
 
				+    echo "[$i/$count] Running testcase $test"
			
 
				+    if ! $test > test.summary 2>&1; then
			
 
				+        cat test.summary
			
 
				+        exit 1
			
 
				+    fi
			
 
				+    i=$((i + 1))
			
 
				+done
			
 
				+
			
 
				+
			
 
				+cover
			
--- a/triehash/tests/test-basic
+++ b/triehash/tests/test-basic
@@ -0,0 +1,245 @@
 
				+#!/bin/sh
			
 
				+. $(dirname $(readlink -f $0))/framework.sh
			
 
				+
			
 
				+# Check for non-existing files
			
 
				+testfailure triehash -C /does/not/exist1 -H /does/not/exist1 /does/not/exist/input
			
 
				+
			
 
				+# Check that we can specify - for -C and -H
			
 
				+testsuccessequal "#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum PerfectKey {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C - -H -
			
 
				+
			
 
				+# Check that split files work
			
 
				+testsuccess triehash -C test.c -H test.h --multi-byte=0
			
 
				+testfileequal "#include \"test.h\"
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}" test.c
			
 
				+testfileequal "#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum PerfectKey {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */" test.h
			
 
				+
			
 
				+
			
 
				+# Check the C code generator
			
 
				+testsuccess triehash -C test.c -H test.c /dev/stdin
			
 
				+testfileequal "#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum PerfectKey {
			
 
				+    VeryLongWord = 43,
			
 
				+    Word = 42,
			
 
				+    Word___0 = 0,
			
 
				+    Label = 44,
			
 
				+    Unknown = -9,
			
 
				+};
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+#ifdef __GNUC__
			
 
				+typedef uint16_t __attribute__((aligned (1))) triehash_uu16;
			
 
				+typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1];
			
 
				+typedef uint32_t __attribute__((aligned (1))) triehash_uu32;
			
 
				+typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1];
			
 
				+typedef uint64_t __attribute__((aligned (1))) triehash_uu64;
			
 
				+typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1];
			
 
				+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (s))
			
 
				+#else
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s))
			
 
				+#endif
			
 
				+#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE)
			
 
				+#define TRIE_HASH_MULTI_BYTE
			
 
				+#endif
			
 
				+#endif /*GNUC */
			
 
				+#ifdef TRIE_HASH_MULTI_BYTE
			
 
				+static enum PerfectKey PerfectHash4(const char *string)
			
 
				+{
			
 
				+    switch(*((triehash_uu32*) &string[0])) {
			
 
				+    case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32):
			
 
				+        return Word;
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash5(const char *string)
			
 
				+{
			
 
				+    switch(*((triehash_uu32*) &string[0])) {
			
 
				+    case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32):
			
 
				+        switch(string[4]) {
			
 
				+        case 0| onechar('2', 0, 8):
			
 
				+            return Label;
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash7(const char *string)
			
 
				+{
			
 
				+    switch(*((triehash_uu32*) &string[0])) {
			
 
				+    case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32):
			
 
				+        switch(string[4]) {
			
 
				+        case 0| onechar('-', 0, 8):
			
 
				+            switch(string[5]) {
			
 
				+            case 0| onechar('_', 0, 8):
			
 
				+                switch(string[6]) {
			
 
				+                case 0| onechar('0', 0, 8):
			
 
				+                    return Word___0;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash12(const char *string)
			
 
				+{
			
 
				+    switch(*((triehash_uu64*) &string[0])) {
			
 
				+    case 0| onechar('V', 0, 64)| onechar('e', 8, 64)| onechar('r', 16, 64)| onechar('y', 24, 64)| onechar('L', 32, 64)| onechar('o', 40, 64)| onechar('n', 48, 64)| onechar('g', 56, 64):
			
 
				+        switch(*((triehash_uu32*) &string[8])) {
			
 
				+        case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32):
			
 
				+            return VeryLongWord;
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#else
			
 
				+static enum PerfectKey PerfectHash4(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'W':
			
 
				+        switch(string[1]) {
			
 
				+        case 'o':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'd':
			
 
				+                    return Word;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash5(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'W':
			
 
				+        switch(string[1]) {
			
 
				+        case 'o':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'd':
			
 
				+                    switch(string[4]) {
			
 
				+                    case '2':
			
 
				+                        return Label;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash7(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'W':
			
 
				+        switch(string[1]) {
			
 
				+        case 'o':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'd':
			
 
				+                    switch(string[4]) {
			
 
				+                    case '-':
			
 
				+                        switch(string[5]) {
			
 
				+                        case '_':
			
 
				+                            switch(string[6]) {
			
 
				+                            case '0':
			
 
				+                                return Word___0;
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash12(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'V':
			
 
				+        switch(string[1]) {
			
 
				+        case 'e':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'y':
			
 
				+                    switch(string[4]) {
			
 
				+                    case 'L':
			
 
				+                        switch(string[5]) {
			
 
				+                        case 'o':
			
 
				+                            switch(string[6]) {
			
 
				+                            case 'n':
			
 
				+                                switch(string[7]) {
			
 
				+                                case 'g':
			
 
				+                                    switch(string[8]) {
			
 
				+                                    case 'W':
			
 
				+                                        switch(string[9]) {
			
 
				+                                        case 'o':
			
 
				+                                            switch(string[10]) {
			
 
				+                                            case 'r':
			
 
				+                                                switch(string[11]) {
			
 
				+                                                case 'd':
			
 
				+                                                    return VeryLongWord;
			
 
				+                                                }
			
 
				+                                            }
			
 
				+                                        }
			
 
				+                                    }
			
 
				+                                }
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#endif /* TRIE_HASH_MULTI_BYTE */
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    case 4:
			
 
				+        return PerfectHash4(string);
			
 
				+    case 5:
			
 
				+        return PerfectHash5(string);
			
 
				+    case 7:
			
 
				+        return PerfectHash7(string);
			
 
				+    case 12:
			
 
				+        return PerfectHash12(string);
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */"  test.c
			
--- a/triehash/tests/test-case-insensitive
+++ b/triehash/tests/test-case-insensitive
@@ -0,0 +1,109 @@
 
				+#!/bin/sh
			
 
				+. $(dirname $(readlink -f $0))/framework.sh
			
 
				+
			
 
				+WORDS="Halllo\nH-lllo\nHalll1"
			
 
				+
			
 
				+# Case-insensitive test
			
 
				+testsuccessequal "#include \"/dev/null\"
			
 
				+#ifdef __GNUC__
			
 
				+typedef uint16_t __attribute__((aligned (1))) triehash_uu16;
			
 
				+typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1];
			
 
				+typedef uint32_t __attribute__((aligned (1))) triehash_uu32;
			
 
				+typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1];
			
 
				+typedef uint64_t __attribute__((aligned (1))) triehash_uu64;
			
 
				+typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1];
			
 
				+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (s))
			
 
				+#else
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s))
			
 
				+#endif
			
 
				+#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE)
			
 
				+#define TRIE_HASH_MULTI_BYTE
			
 
				+#endif
			
 
				+#endif /*GNUC */
			
 
				+#ifdef TRIE_HASH_MULTI_BYTE
			
 
				+static enum PerfectKey PerfectHash6(const char *string)
			
 
				+{
			
 
				+    switch(string[0] | 0x20) {
			
 
				+    case 0| onechar('h', 0, 8):
			
 
				+        switch(string[1]) {
			
 
				+        case 0| onechar('-', 0, 8):
			
 
				+            switch(*((triehash_uu32*) &string[2]) | 0x20202020) {
			
 
				+            case 0| onechar('l', 0, 32)| onechar('l', 8, 32)| onechar('l', 16, 32)| onechar('o', 24, 32):
			
 
				+                return H_lllo;
			
 
				+            }
			
 
				+            break;
			
 
				+        case 0| onechar('a', 0, 8):
			
 
				+        case 0| onechar('A', 0, 8):
			
 
				+            switch(*((triehash_uu16*) &string[2]) | 0x2020) {
			
 
				+            case 0| onechar('l', 0, 16)| onechar('l', 8, 16):
			
 
				+                switch(string[4] | 0x20) {
			
 
				+                case 0| onechar('l', 0, 8):
			
 
				+                    switch(string[5]) {
			
 
				+                    case 0| onechar('1', 0, 8):
			
 
				+                        return Halll1;
			
 
				+                        break;
			
 
				+                    case 0| onechar('o', 0, 8):
			
 
				+                    case 0| onechar('O', 0, 8):
			
 
				+                        return Halllo;
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#else
			
 
				+static enum PerfectKey PerfectHash6(const char *string)
			
 
				+{
			
 
				+    switch(string[0] | 0x20) {
			
 
				+    case 'h':
			
 
				+        switch(string[1]) {
			
 
				+        case '-':
			
 
				+            switch(string[2] | 0x20) {
			
 
				+            case 'l':
			
 
				+                switch(string[3] | 0x20) {
			
 
				+                case 'l':
			
 
				+                    switch(string[4] | 0x20) {
			
 
				+                    case 'l':
			
 
				+                        switch(string[5] | 0x20) {
			
 
				+                        case 'o':
			
 
				+                            return H_lllo;
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+            break;
			
 
				+        case 'a':
			
 
				+        case 'A':
			
 
				+            switch(string[2] | 0x20) {
			
 
				+            case 'l':
			
 
				+                switch(string[3] | 0x20) {
			
 
				+                case 'l':
			
 
				+                    switch(string[4] | 0x20) {
			
 
				+                    case 'l':
			
 
				+                        switch(string[5]) {
			
 
				+                        case '1':
			
 
				+                            return Halll1;
			
 
				+                            break;
			
 
				+                        case 'o':
			
 
				+                        case 'O':
			
 
				+                            return Halllo;
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#endif /* TRIE_HASH_MULTI_BYTE */
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    case 6:
			
 
				+        return PerfectHash6(string);
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}" triehash --multi-byte=3210 --ignore-case -H /dev/null /dev/stdin
			
--- a/triehash/tests/test-enum-include-name-flags
+++ b/triehash/tests/test-enum-include-name-flags
@@ -0,0 +1,129 @@
 
				+#!/bin/sh
			
 
				+. $(dirname $(readlink -f $0))/framework.sh
			
 
				+
			
 
				+# Need a short word, we really just need to check if the labels work
			
 
				+WORDS=w
			
 
				+
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+#include <foo.h>
			
 
				+enum PerfectKey {
			
 
				+    w = 0,
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C /dev/null --include="<foo.h>" /dev/stdin
			
 
				+
			
 
				+# Check for --enum-class support
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum class PerfectKey {
			
 
				+    w = 0,
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+static enum PerfectKey PerfectHash1(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'w':
			
 
				+        return PerfectKey::w;
			
 
				+    }
			
 
				+    return PerfectKey::Unknown;
			
 
				+}
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    case 1:
			
 
				+        return PerfectHash1(string);
			
 
				+    default:
			
 
				+        return PerfectKey::Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 --enum-class /dev/stdin
			
 
				+
			
 
				+# Check for --enum-name support
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum Foo {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum Foo PerfectHash(const char *string, size_t length);
			
 
				+static enum Foo PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */\
			
 
				+" triehash --multi-byte=0 --enum-name="Foo"
			
 
				+
			
 
				+# Check for --enum-class support
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum class Foo::Bar {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum Foo::Bar PerfectHash(const char *string, size_t length);
			
 
				+static enum Foo::Bar PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Foo::Bar::Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */\
			
 
				+" triehash --multi-byte=0 --enum-class --enum-name="Foo::Bar"
			
 
				+
			
 
				+# Check for --function-name support
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_NonSense
			
 
				+#define TRIE_HASH_NonSense
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum PerfectKey {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum PerfectKey NonSense(const char *string, size_t length);
			
 
				+static enum PerfectKey NonSense(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_NonSense */\
			
 
				+" triehash --multi-byte=0 --function-name="NonSense"
			
 
				+
			
 
				+# Check for --counter-name support
			
 
				+testsuccessequal "\
			
 
				+#ifndef TRIE_HASH_PerfectHash
			
 
				+#define TRIE_HASH_PerfectHash
			
 
				+#include <stddef.h>
			
 
				+#include <stdint.h>
			
 
				+enum { MyCounter = 0 };
			
 
				+enum PerfectKey {
			
 
				+    Unknown = -1,
			
 
				+};
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length);
			
 
				+static enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}
			
 
				+#endif                       /* TRIE_HASH_PerfectHash */\
			
 
				+" triehash --multi-byte=0 --counter-name="MyCounter"
			
--- a/triehash/tests/test-multi-byte-level
+++ b/triehash/tests/test-multi-byte-level
@@ -0,0 +1,427 @@
 
				+#!/bin/sh
			
 
				+. $(dirname $(readlink -f $0))/framework.sh
			
 
				+
			
 
				+# Check that building a single-byte trie works
			
 
				+testsuccessequal "\
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Initial trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── V
			
 
				+│   ├── e
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── y
			
 
				+│   │   │   │   ├── L
			
 
				+│   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   ├── n
			
 
				+│   │   │   │   │   │   │   ├── g
			
 
				+│   │   │   │   │   │   │   │   ├── W
			
 
				+│   │   │   │   │   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   │   │   │   │   ├── r
			
 
				+│   │   │   │   │   │   │   │   │   │   │   ├── d → VeryLongWord
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Rebuilt trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── V
			
 
				+│   ├── e
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── y
			
 
				+│   │   │   │   ├── L
			
 
				+│   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   ├── n
			
 
				+│   │   │   │   │   │   │   ├── g
			
 
				+│   │   │   │   │   │   │   │   ├── W
			
 
				+│   │   │   │   │   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   │   │   │   │   ├── r
			
 
				+│   │   │   │   │   │   │   │   │   │   │   ├── d → VeryLongWord
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 4            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 5            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 7            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 12           │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── V
			
 
				+│   ├── e
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── y
			
 
				+│   │   │   │   ├── L
			
 
				+│   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   ├── n
			
 
				+│   │   │   │   │   │   │   ├── g
			
 
				+│   │   │   │   │   │   │   │   ├── W
			
 
				+│   │   │   │   │   │   │   │   │   ├── o
			
 
				+│   │   │   │   │   │   │   │   │   │   ├── r
			
 
				+│   │   │   │   │   │   │   │   │   │   │   ├── d → VeryLongWord" triehash --multi-byte=0 -l tree /dev/stdin
			
 
				+
			
 
				+# Two byte optimization
			
 
				+testsuccessequal "\
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Initial trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Ve
			
 
				+│   ├── ry
			
 
				+│   │   ├── Lo
			
 
				+│   │   │   ├── ng
			
 
				+│   │   │   │   ├── Wo
			
 
				+│   │   │   │   │   ├── rd → VeryLongWord
			
 
				+├── Wo
			
 
				+│   ├── rd → Word
			
 
				+│   │   ├── -_
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+│   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Rebuilt trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Ve
			
 
				+│   ├── ry
			
 
				+│   │   ├── Lo
			
 
				+│   │   │   ├── ng
			
 
				+│   │   │   │   ├── Wo
			
 
				+│   │   │   │   │   ├── rd → VeryLongWord
			
 
				+├── Wo
			
 
				+│   ├── rd → Word
			
 
				+│   │   ├── -
			
 
				+│   │   │   ├── _0 → Word-_0
			
 
				+│   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 4            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Wo
			
 
				+│   ├── rd → Word
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 5            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Wo
			
 
				+│   ├── rd
			
 
				+│   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 7            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Wo
			
 
				+│   ├── rd
			
 
				+│   │   ├── -_
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 12           │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Ve
			
 
				+│   ├── ry
			
 
				+│   │   ├── Lo
			
 
				+│   │   │   ├── ng
			
 
				+│   │   │   │   ├── Wo
			
 
				+│   │   │   │   │   ├── rd → VeryLongWord" triehash --multi-byte=1 -l tree /dev/stdin
			
 
				+# Four byte optimization
			
 
				+testsuccessequal "\
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Initial trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Very
			
 
				+│   ├── Long
			
 
				+│   │   ├── Word → VeryLongWord
			
 
				+├── Word → Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Rebuilt trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Very
			
 
				+│   ├── Long
			
 
				+│   │   ├── Word → VeryLongWord
			
 
				+├── Word → Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 4            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word → Word
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 5            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 7            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 12           │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Very
			
 
				+│   ├── Long
			
 
				+│   │   ├── Word → VeryLongWord" triehash --multi-byte=2 -l tree /dev/stdin
			
 
				+# Eigh byte optimization
			
 
				+testsuccessequal "\
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Initial trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── VeryLong
			
 
				+│   ├── W
			
 
				+│   │   ├── o
			
 
				+│   │   │   ├── r
			
 
				+│   │   │   │   ├── d → VeryLongWord
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Rebuilt trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── V
			
 
				+│   ├── eryLongW
			
 
				+│   │   ├── o
			
 
				+│   │   │   ├── r
			
 
				+│   │   │   │   ├── d → VeryLongWord
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 4            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d → Word
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 5            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d
			
 
				+│   │   │   │   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 7            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── W
			
 
				+│   ├── o
			
 
				+│   │   ├── r
			
 
				+│   │   │   ├── d
			
 
				+│   │   │   │   ├── -
			
 
				+│   │   │   │   │   ├── _
			
 
				+│   │   │   │   │   │   ├── 0 → Word-_0
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 12           │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── VeryLong
			
 
				+│   ├── W
			
 
				+│   │   ├── o
			
 
				+│   │   │   ├── r
			
 
				+│   │   │   │   ├── d → VeryLongWord" triehash --multi-byte=3 -l tree /dev/stdin
			
 
				+
			
 
				+
			
 
				+# Check that building a multi-byte trie works
			
 
				+testsuccessequal "\
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Initial trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── VeryLong
			
 
				+│   ├── Word → VeryLongWord
			
 
				+├── Word → Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│                   Rebuilt trie                     │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Very
			
 
				+│   ├── LongWord → VeryLongWord
			
 
				+├── Word → Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 4            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word → Word
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 5            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word
			
 
				+│   ├── 2 → Label
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 7            │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── Word
			
 
				+│   ├── -
			
 
				+│   │   ├── _
			
 
				+│   │   │   ├── 0 → Word-_0
			
 
				+┌────────────────────────────────────────────────────┐
			
 
				+│              Trie for words of length 12           │
			
 
				+└────────────────────────────────────────────────────┘
			
 
				+
			
 
				+├── VeryLong
			
 
				+│   ├── Word → VeryLongWord" triehash -l tree /dev/stdin
			
 
				+
			
 
				+
			
 
				+###### CHANGE THE WORDS FOR THE FOLLOWING TESTS #######
			
 
				+WORDS="Word"
			
 
				+
			
 
				+# Check that we are generating the proper multi-byte and fallback sessions
			
 
				+testsuccessequal "#include \"/dev/null\"
			
 
				+#ifdef __GNUC__
			
 
				+typedef uint16_t __attribute__((aligned (1))) triehash_uu16;
			
 
				+typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1];
			
 
				+typedef uint32_t __attribute__((aligned (1))) triehash_uu32;
			
 
				+typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1];
			
 
				+typedef uint64_t __attribute__((aligned (1))) triehash_uu64;
			
 
				+typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1];
			
 
				+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (s))
			
 
				+#else
			
 
				+#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s))
			
 
				+#endif
			
 
				+#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE)
			
 
				+#define TRIE_HASH_MULTI_BYTE
			
 
				+#endif
			
 
				+#endif /*GNUC */
			
 
				+#ifdef TRIE_HASH_MULTI_BYTE
			
 
				+static enum PerfectKey PerfectHash4(const char *string)
			
 
				+{
			
 
				+    switch(*((triehash_uu32*) &string[0])) {
			
 
				+    case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32):
			
 
				+        return Word;
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#else
			
 
				+static enum PerfectKey PerfectHash4(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'W':
			
 
				+        switch(string[1]) {
			
 
				+        case 'o':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'd':
			
 
				+                    return Word;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+#endif /* TRIE_HASH_MULTI_BYTE */
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    case 4:
			
 
				+        return PerfectHash4(string);
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}" triehash -H /dev/null /dev/stdin
			
 
				+
			
 
				+
			
 
				+# Check that we are generating no multi-byte session
			
 
				+testsuccessequal "#include \"/dev/null\"
			
 
				+static enum PerfectKey PerfectHash4(const char *string)
			
 
				+{
			
 
				+    switch(string[0]) {
			
 
				+    case 'W':
			
 
				+        switch(string[1]) {
			
 
				+        case 'o':
			
 
				+            switch(string[2]) {
			
 
				+            case 'r':
			
 
				+                switch(string[3]) {
			
 
				+                case 'd':
			
 
				+                    return Word;
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return Unknown;
			
 
				+}
			
 
				+ enum PerfectKey PerfectHash(const char *string, size_t length)
			
 
				+{
			
 
				+    switch (length) {
			
 
				+    case 4:
			
 
				+        return PerfectHash4(string);
			
 
				+    default:
			
 
				+        return Unknown;
			
 
				+    }
			
 
				+}" triehash --multi-byte=0 -H /dev/null /dev/stdin
			
--- a/triehash/triehash.pl
+++ b/triehash/triehash.pl
@@ -0,0 +1,660 @@
 
				+#!/usr/bin/perl -w
			
 
				+#
			
 
				+# Copyright (C) 2016 Julian Andres Klode <jak@jak-linux.org>
			
 
				+#
			
 
				+# Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+# of this software and associated documentation files (the "Software"), to deal
			
 
				+# in the Software without restriction, including without limitation the rights
			
 
				+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+# copies of the Software, and to permit persons to whom the Software is
			
 
				+# furnished to do so, subject to the following conditions:
			
 
				+#
			
 
				+# The above copyright notice and this permission notice shall be included in
			
 
				+# all copies or substantial portions of the Software.
			
 
				+#
			
 
				+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
			
 
				+# THE SOFTWARE.
			
 
				+
			
 
				+
			
 
				+=head1 NAME
			
 
				+
			
 
				+triehash - Generate a perfect hash function derived from a trie.
			
 
				+
			
 
				+=cut
			
 
				+
			
 
				+use strict;
			
 
				+use warnings;
			
 
				+use Getopt::Long;
			
 
				+
			
 
				+=head1 SYNOPSIS
			
 
				+
			
 
				+B<triehash> [S<I<option>>] [S<I<input file>>]
			
 
				+
			
 
				+=head1 DESCRIPTION
			
 
				+
			
 
				+triehash takes a list of words in input file and generates a function and
			
 
				+an enumeration to describe the word
			
 
				+
			
 
				+=head1 INPUT FILE FORMAT
			
 
				+
			
 
				+The file consists of multiple lines of the form:
			
 
				+
			
 
				+    [label ~ ] word [= value]
			
 
				+
			
 
				+This maps word to value, and generates an enumeration with entries of the form:
			
 
				+
			
 
				+    label = value
			
 
				+
			
 
				+If I<label> is undefined, the word will be used, the minus character will be
			
 
				+replaced by an underscore. If value is undefined it is counted upwards from
			
 
				+the last value.
			
 
				+
			
 
				+There may also be one line of the format
			
 
				+
			
 
				+    [ label ~] = value
			
 
				+
			
 
				+Which defines the value to be used for non-existing keys. Note that this also
			
 
				+changes default value for other keys, as for normal entries. So if you place
			
 
				+
			
 
				+    = 0
			
 
				+
			
 
				+at the beginning of the file, unknown strings map to 0, and the other strings
			
 
				+map to values starting with 1. If label is not specified, the default is
			
 
				+I<Unknown>.
			
 
				+
			
 
				+=head1 OPTIONS
			
 
				+
			
 
				+=over 4
			
 
				+
			
 
				+=item B<-C>I<.c file> B<--code>=I<.c file>
			
 
				+
			
 
				+Generate code in the given file.
			
 
				+
			
 
				+=item B<-H>I<header file> B<--header>=I<header file>
			
 
				+
			
 
				+Generate a header in the given file, containing a declaration of the hash
			
 
				+function and an enumeration.
			
 
				+
			
 
				+=item B<--enum-name=>I<word>
			
 
				+
			
 
				+The name of the enumeration.
			
 
				+
			
 
				+=item B<--function-name=>I<word>
			
 
				+
			
 
				+The name of the function.
			
 
				+
			
 
				+=item B<--namespace=>I<name>
			
 
				+
			
 
				+Put the function and enum into a namespace (C++)
			
 
				+
			
 
				+=item B<--class=>I<name>
			
 
				+
			
 
				+Put the function and enum into a class (C++)
			
 
				+
			
 
				+=item B<--enum-class>
			
 
				+
			
 
				+Generate an enum class instead of an enum (C++)
			
 
				+
			
 
				+=item B<--counter-name=>I<name>
			
 
				+
			
 
				+Use I<name> for a counter that is set to the latest entry in the enumeration
			
 
				++ 1. This can be useful for defining array sizes.
			
 
				+
			
 
				+=item B<--extern-c>
			
 
				+
			
 
				+Wrap everything into an extern "C" block. Not compatible with the C++
			
 
				+options, as a header with namespaces, classes, or enum classes is not
			
 
				+valid C.
			
 
				+
			
 
				+=item B<--multi-byte>=I<value>
			
 
				+
			
 
				+Generate code reading multiple bytes at once. The value is a string of power
			
 
				+of twos to enable. The default value is 320 meaning that 8, 4, and single byte
			
 
				+reads are enabled. Specify 0 to disable multi-byte completely, or add 2 if you
			
 
				+also want to allow 2-byte reads. 2-byte reads are disabled by default because
			
 
				+they negatively affect performance on older Intel architectures.
			
 
				+
			
 
				+This generates code for both multiple bytes and single byte reads, but only
			
 
				+enables the multiple byte reads of GNU C compatible compilers, as the following
			
 
				+extensions are used:
			
 
				+
			
 
				+=over 8
			
 
				+
			
 
				+=item Byte-aligned integers
			
 
				+
			
 
				+We must be able to generate integers that are aligned to a single byte using:
			
 
				+
			
 
				+    typedef uint64_t __attribute__((aligned (1))) triehash_uu64;
			
 
				+
			
 
				+=item Byte-order
			
 
				+
			
 
				+The macros __BYTE_ORDER__ and __ORDER_LITTLE_ENDIAN__ must be defined.
			
 
				+
			
 
				+=back
			
 
				+
			
 
				+We forcefully disable multi-byte reads on platforms where the variable
			
 
				+I<__ARM_ARCH> is defined and I<__ARM_FEATURE_UNALIGNED> is not defined,
			
 
				+as there is a measurable overhead from emulating the unaligned reads on
			
 
				+ARM.
			
 
				+
			
 
				+=item B<--language=>I<language>
			
 
				+
			
 
				+Generate a file in the specified language. Currently known are 'C' and 'tree',
			
 
				+the latter generating a tree.
			
 
				+
			
 
				+=item B<--include=>I<header>
			
 
				+
			
 
				+Add the header to the include statements of the header file. The value must
			
 
				+be surrounded by quotes or angle brackets for C code. May be specified multiple
			
 
				+times.
			
 
				+
			
 
				+=back
			
 
				+
			
 
				+=cut
			
 
				+
			
 
				+my $unknown = -1;
			
 
				+my $unknown_label = "Unknown";
			
 
				+my $counter_start = 0;
			
 
				+my $enum_name = "PerfectKey";
			
 
				+my $function_name = "PerfectHash";
			
 
				+my $enum_class = 0;
			
 
				+
			
 
				+my $code_name = "-";
			
 
				+my $header_name = "-";
			
 
				+my $code;
			
 
				+my $header;
			
 
				+my $ignore_case = 0;
			
 
				+my $multi_byte = "320";
			
 
				+my $language = 'C';
			
 
				+my $counter_name = undef;
			
 
				+my @includes = ();
			
 
				+
			
 
				+
			
 
				+Getopt::Long::config('default',
			
 
				+                     'bundling',
			
 
				+                     'no_getopt_compat',
			
 
				+                     'no_auto_abbrev',
			
 
				+                     'permute',
			
 
				+                     'auto_help');
			
 
				+
			
 
				+GetOptions ("code|C=s" => \$code_name,
			
 
				+            "header|H=s"   => \$header_name,
			
 
				+            "function-name=s" => \$function_name,
			
 
				+            "ignore-case" => \$ignore_case,
			
 
				+            "enum-name=s" => \$enum_name,
			
 
				+            "language|l=s" => \$language,
			
 
				+            "multi-byte=s" => \$multi_byte,
			
 
				+            "enum-class" => \$enum_class,
			
 
				+            "include=s" => \@includes,
			
 
				+            "counter-name=s" => \$counter_name)
			
 
				+    or die("Could not parse options!");
			
 
				+
			
 
				+
			
 
				+package Trie {
			
 
				+
			
 
				+    sub new {
			
 
				+        my $class = shift;
			
 
				+        my $self = {};
			
 
				+        bless $self, $class;
			
 
				+
			
 
				+        $self->{children} = {};
			
 
				+        $self->{value} = undef;
			
 
				+        $self->{label} = undef;
			
 
				+
			
 
				+        return $self;
			
 
				+    }
			
 
				+
			
 
				+    # Return the largest power of 2 smaller or equal to the argument
			
 
				+    sub alignpower2 {
			
 
				+        my ($self, $length) = @_;
			
 
				+
			
 
				+        return 8 if ($length >= 8 && $multi_byte =~ /3/);
			
 
				+        return 4 if ($length >= 4 && $multi_byte =~ /2/);
			
 
				+        return 2 if ($length >= 2 && $multi_byte =~ /1/);
			
 
				+
			
 
				+        return 1;
			
 
				+    }
			
 
				+
			
 
				+    # Split the key into a head block and a tail
			
 
				+    sub split_key {
			
 
				+        my ($self, $key) = @_;
			
 
				+        my $length = length $key;
			
 
				+        my $split = $self->alignpower2($length);
			
 
				+
			
 
				+        return (substr($key, 0, $split), substr($key, $split));
			
 
				+    }
			
 
				+
			
 
				+    sub insert {
			
 
				+        my ($self, $key, $label, $value) = @_;
			
 
				+
			
 
				+        if (length($key) == 0) {
			
 
				+            $self->{label} = $label;
			
 
				+            $self->{value} = $value;
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				+        my ($child, $tail) = $self->split_key($key);
			
 
				+
			
 
				+        $self->{children}{$child} = Trie->new if (!defined($self->{children}{$child}));
			
 
				+
			
 
				+        $self->{children}{$child}->insert($tail, $label, $value);
			
 
				+    }
			
 
				+
			
 
				+    sub filter_depth {
			
 
				+        my ($self, $togo) = @_;
			
 
				+
			
 
				+        my $new = Trie->new;
			
 
				+
			
 
				+        if ($togo != 0) {
			
 
				+            my $found = 0;
			
 
				+            foreach my $key (sort keys %{$self->{children}}) {
			
 
				+                if ($togo > length($key) || defined $self->{children}{$key}->{value}) {
			
 
				+                    my $child = $self->{children}{$key}->filter_depth($togo - length($key));
			
 
				+
			
 
				+                    $new->{children}{$key}= $child if defined $child;
			
 
				+                    $found = 1 if defined $child;
			
 
				+                }
			
 
				+            }
			
 
				+            return undef if (!$found);
			
 
				+        } else {
			
 
				+            $new->{value} = $self->{value};
			
 
				+            $new->{label} = $self->{label};
			
 
				+        }
			
 
				+
			
 
				+        return $new;
			
 
				+    }
			
 
				+
			
 
				+    # Reinsert all value nodes into the specified $trie, prepending $prefix
			
 
				+    # to their $paths.
			
 
				+    sub reinsert_value_nodes_into {
			
 
				+        my ($self, $trie, $prefix) = @_;
			
 
				+
			
 
				+        $trie->insert($prefix, $self->{label}, $self->{value}) if (defined $self->{value});
			
 
				+
			
 
				+        foreach my $key (sort keys %{$self->{children}}) {
			
 
				+            $self->{children}{$key}->reinsert_value_nodes_into($trie, $prefix . $key);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    # Find an earlier split due a an ambiguous character
			
 
				+    sub find_ealier_split {
			
 
				+        my ($self, $key) = @_;
			
 
				+
			
 
				+        if ($ignore_case) {
			
 
				+            for my $i (0..length($key)-1) {
			
 
				+                # If the key starts with an ambiguous character, we need to
			
 
				+                # take only it. Otherwise, we need to take everything
			
 
				+                # before the character.
			
 
				+                return $self->alignpower2($i || 1) if (main::ambiguous(substr($key, $i, 1)));
			
 
				+            }
			
 
				+        }
			
 
				+        return $self->alignpower2(length $key);
			
 
				+    }
			
 
				+
			
 
				+    # Rebuild the trie, splitting at ambigous chars, and unifying key lengths
			
 
				+    sub rebuild_tree {
			
 
				+        my $self = shift;
			
 
				+        # Determine if/where we need to split before an ambiguous character
			
 
				+        my $new_split = 99999999999999999;
			
 
				+        foreach my $key (sort keys %{$self->{children}}) {
			
 
				+            my $special_length = $self->find_ealier_split($key);
			
 
				+            $new_split = $special_length if ($special_length < $new_split);
			
 
				+        }
			
 
				+
			
 
				+        # Start building a new uniform trie
			
 
				+        my $newself = Trie->new;
			
 
				+        $newself->{label} = $self->{label};
			
 
				+        $newself->{value} = $self->{value};
			
 
				+        $newself->{children} = {};
			
 
				+
			
 
				+        foreach my $key (sort keys %{$self->{children}}) {
			
 
				+            my $head = substr($key, 0, $new_split);
			
 
				+            my $tail = substr($key, $new_split);
			
 
				+            # Rebuild the child node at $head, pushing $tail downwards
			
 
				+            $newself->{children}{$head} //= Trie->new;
			
 
				+            $self->{children}{$key}->reinsert_value_nodes_into($newself->{children}{$head}, $tail);
			
 
				+            # We took up to one special character of each key label. There might
			
 
				+            # be more, so we need to rebuild recursively.
			
 
				+            $newself->{children}{$head} = $newself->{children}{$head}->rebuild_tree();
			
 
				+        }
			
 
				+
			
 
				+        return $newself;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+# Code generator for C and C++
			
 
				+package CCodeGen {
			
 
				+    my $static = ($code_name eq $header_name) ? "static" : "";
			
 
				+    my $enum_specifier = $enum_class ? "enum class" : "enum";
			
 
				+
			
 
				+    sub new {
			
 
				+        my $class = shift;
			
 
				+        my $self = {};
			
 
				+        bless $self, $class;
			
 
				+
			
 
				+        return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub open_output {
			
 
				+        my $self = shift;
			
 
				+        if ($code_name ne "-") {
			
 
				+            open($code, '>', $code_name) or die "Cannot open $code_name: $!" ;
			
 
				+        } else {
			
 
				+            $code = *STDOUT;
			
 
				+        }
			
 
				+        if($code_name eq $header_name) {
			
 
				+            $header = $code;
			
 
				+        } elsif ($header_name ne "-") {
			
 
				+            open($header, '>', $header_name) or die "Cannot open $header_name: $!" ;
			
 
				+        } else {
			
 
				+            $header = *STDOUT;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    sub word_to_label {
			
 
				+        my ($class, $word) = @_;
			
 
				+
			
 
				+        $word =~ s/_/__/g;
			
 
				+        $word =~ s/-/_/g;
			
 
				+        return $word;
			
 
				+    }
			
 
				+
			
 
				+    # Return a case label, by shifting and or-ing bytes in the word
			
 
				+    sub case_label {
			
 
				+        my ($self, $key) = @_;
			
 
				+
			
 
				+        return sprintf("'%s'", substr($key, 0, 1)) if not $multi_byte;
			
 
				+
			
 
				+        my $output = '0';
			
 
				+
			
 
				+        for my $i (0..length($key)-1) {
			
 
				+            $output .= sprintf("| onechar('%s', %d, %d)", substr($key, $i, 1), 8 * $i, 8*length($key));
			
 
				+        }
			
 
				+
			
 
				+        return $output;
			
 
				+    }
			
 
				+
			
 
				+    # Return an appropriate read instruction for $length bytes from $offset
			
 
				+    sub switch_key {
			
 
				+        my ($self, $offset, $length) = @_;
			
 
				+
			
 
				+        return "string[$offset]" if $length == 1;
			
 
				+        return sprintf("*((triehash_uu%s*) &string[$offset])", $length * 8);
			
 
				+    }
			
 
				+
			
 
				+    sub print_table {
			
 
				+        my ($self, $trie, $fh, $indent, $index) = @_;
			
 
				+        $indent //= 0;
			
 
				+        $index //= 0;
			
 
				+
			
 
				+        if (defined $trie->{value}) {
			
 
				+            printf $fh ("    " x $indent . "return %s;\n", ($enum_class ? "${enum_name}::" : "").$trie->{label});
			
 
				+            return;
			
 
				+        }
			
 
				+
			
 
				+        # The difference between lowercase and uppercase alphabetical characters
			
 
				+        # is that they have one bit flipped. If we have alphabetical characters
			
 
				+        # in the search space, and the entire search space works fine if we
			
 
				+        # always turn on the flip, just OR the character we are switching over
			
 
				+        # with the bit.
			
 
				+        my $want_use_bit = 0;
			
 
				+        my $can_use_bit = 1;
			
 
				+        my $key_length = 0;
			
 
				+        foreach my $key (sort keys %{$trie->{children}}) {
			
 
				+            $can_use_bit &= not main::ambiguous($key);
			
 
				+            $want_use_bit |= ($key =~ /^[a-zA-Z]+$/);
			
 
				+            $key_length = length($key);
			
 
				+        }
			
 
				+
			
 
				+        if ($ignore_case && $can_use_bit && $want_use_bit) {
			
 
				+            printf $fh (("    " x $indent) . "switch(%s | 0x%s) {\n", $self->switch_key($index, $key_length), "20" x $key_length);
			
 
				+        } else {
			
 
				+            printf $fh (("    " x $indent) . "switch(%s) {\n", $self->switch_key($index, $key_length));
			
 
				+        }
			
 
				+
			
 
				+        my $notfirst = 0;
			
 
				+        foreach my $key (sort keys %{$trie->{children}}) {
			
 
				+            if ($notfirst) {
			
 
				+                printf $fh ("    " x $indent . "    break;\n");
			
 
				+            }
			
 
				+            if ($ignore_case) {
			
 
				+                printf $fh ("    " x $indent . "case %s:\n", $self->case_label(lc($key)));
			
 
				+                printf $fh ("    " x $indent . "case %s:\n", $self->case_label(uc($key))) if lc($key) ne uc($key) && !($can_use_bit && $want_use_bit);
			
 
				+            } else {
			
 
				+                printf $fh ("    " x $indent . "case %s:\n", $self->case_label($key));
			
 
				+            }
			
 
				+
			
 
				+            $self->print_table($trie->{children}{$key}, $fh, $indent + 1, $index + length($key));
			
 
				+
			
 
				+            $notfirst=1;
			
 
				+        }
			
 
				+
			
 
				+        printf $fh ("    " x $indent . "}\n");
			
 
				+    }
			
 
				+
			
 
				+    sub print_words {
			
 
				+        my ($self, $trie, $fh, $indent, $sofar) = @_;
			
 
				+
			
 
				+        $indent //= 0;
			
 
				+        $sofar //= "";
			
 
				+
			
 
				+
			
 
				+        printf $fh ("    " x $indent."%s = %s,\n", $trie->{label}, $trie->{value}) if defined $trie->{value};
			
 
				+
			
 
				+        foreach my $key (sort keys %{$trie->{children}}) {
			
 
				+            $self->print_words($trie->{children}{$key}, $fh, $indent, $sofar . $key);
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    sub print_functions {
			
 
				+        my ($self, $trie, %lengths) = @_;
			
 
				+        foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
			
 
				+            print $code ("static enum ${enum_name} ${function_name}${local_length}(const char *string)\n");
			
 
				+            print $code ("{\n");
			
 
				+            $self->print_table($trie->filter_depth($local_length)->rebuild_tree(), $code, 1);
			
 
				+            printf $code ("    return %s$unknown_label;\n", ($enum_class ? "${enum_name}::" : ""));
			
 
				+            print $code ("}\n");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    sub main {
			
 
				+        my ($self, $trie, $num_values, %lengths) = @_;
			
 
				+        print $header ("#ifndef TRIE_HASH_${function_name}\n");
			
 
				+        print $header ("#define TRIE_HASH_${function_name}\n");
			
 
				+        print $header ("#include <stddef.h>\n");
			
 
				+        print $header ("#include <stdint.h>\n");
			
 
				+        foreach my $include (@includes) {
			
 
				+            print $header ("#include $include\n");
			
 
				+        }
			
 
				+        printf $header ("enum { $counter_name = $num_values };\n") if (defined($counter_name));
			
 
				+        print $header ("${enum_specifier} ${enum_name} {\n");
			
 
				+        $self->print_words($trie, $header, 1);
			
 
				+        printf $header ("    $unknown_label = $unknown,\n");
			
 
				+        print $header ("};\n");
			
 
				+        print $header ("$static enum ${enum_name} ${function_name}(const char *string, size_t length);\n");
			
 
				+
			
 
				+        print $code ("#include \"$header_name\"\n") if ($header_name ne $code_name);
			
 
				+
			
 
				+        if ($multi_byte) {
			
 
				+            print $code ("#ifdef __GNUC__\n");
			
 
				+            for (my $i=16; $i <= 64; $i *= 2) {
			
 
				+                print $code ("typedef uint${i}_t __attribute__((aligned (1))) triehash_uu${i};\n");
			
 
				+                print $code ("typedef char static_assert${i}[__alignof__(triehash_uu${i}) == 1 ? 1 : -1];\n");
			
 
				+            }
			
 
				+
			
 
				+            print $code ("#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__\n");
			
 
				+            print $code ("#define onechar(c, s, l) (((uint64_t)(c)) << (s))\n");
			
 
				+            print $code ("#else\n");
			
 
				+            print $code ("#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s))\n");
			
 
				+            print $code ("#endif\n");
			
 
				+            print $code ("#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE)\n");
			
 
				+            print $code ("#define TRIE_HASH_MULTI_BYTE\n");
			
 
				+            print $code ("#endif\n");
			
 
				+            print $code ("#endif /*GNUC */\n");
			
 
				+
			
 
				+            print $code ("#ifdef TRIE_HASH_MULTI_BYTE\n");
			
 
				+            $self->print_functions($trie, %lengths);
			
 
				+            $multi_byte = 0;
			
 
				+            print $code ("#else\n");
			
 
				+            $self->print_functions($trie, %lengths);
			
 
				+            print $code ("#endif /* TRIE_HASH_MULTI_BYTE */\n");
			
 
				+        } else {
			
 
				+            $self->print_functions($trie, %lengths);
			
 
				+        }
			
 
				+
			
 
				+        print $code ("$static enum ${enum_name} ${function_name}(const char *string, size_t length)\n");
			
 
				+        print $code ("{\n");
			
 
				+        print $code ("    switch (length) {\n");
			
 
				+        foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
			
 
				+            print $code ("    case $local_length:\n");
			
 
				+            print $code ("        return ${function_name}${local_length}(string);\n");
			
 
				+        }
			
 
				+        print $code ("    default:\n");
			
 
				+        printf $code ("        return %s$unknown_label;\n", ($enum_class ? "${enum_name}::" : ""));
			
 
				+        print $code ("    }\n");
			
 
				+        print $code ("}\n");
			
 
				+
			
 
				+        # Print end of header here, in case header and code point to the same file
			
 
				+        print $header ("#endif                       /* TRIE_HASH_${function_name} */\n");
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+# Check if the word can be reached by exactly one word in (alphabet OR 0x20).
			
 
				+sub ambiguous {
			
 
				+    my $word = shift;
			
 
				+
			
 
				+    foreach my $char (split //, $word) {
			
 
				+        # Setting the lowercase flag in the character produces a different
			
 
				+        # character, the character would thus not be matched.
			
 
				+        return 1 if ((ord($char) | 0x20) != ord(lc($char)));
			
 
				+
			
 
				+        # A word is also ambiguous if any character in lowercase can be reached
			
 
				+        # by ORing 0x20 from another character in the charset that is not a
			
 
				+        # lowercase character of the current character.
			
 
				+        # Assume that we have UTF-8 and the most significant bit can be set
			
 
				+        for my $i (0..255) {
			
 
				+            return 1 if (($i | 0x20) == ord(lc($char)) && lc(chr($i)) ne lc($char));
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				+sub build_trie {
			
 
				+    my $codegen = shift;
			
 
				+    my $trie = Trie->new;
			
 
				+
			
 
				+    my $counter = $counter_start;
			
 
				+    my %lengths;
			
 
				+
			
 
				+    open(my $input, '<', $ARGV[0]) or die "Cannot open ".$ARGV[0].": $!";
			
 
				+    while (my $line = <$input>) {
			
 
				+        my ($label, $word, $value) = $line =~/\s*(?:([^~\s]+)\s*~)?(?:\s*([^~=\s]+)\s*)?(?:=\s*([^\s]+)\s+)?\s*/;
			
 
				+
			
 
				+        if (defined $word) {
			
 
				+            $counter = $value if defined($value);
			
 
				+            $label //= $codegen->word_to_label($word);
			
 
				+
			
 
				+            $trie->insert($word, $label, $counter);
			
 
				+            $lengths{length($word)} = 1;
			
 
				+            $counter++;
			
 
				+        } elsif (defined $value) {
			
 
				+            $unknown = $value;
			
 
				+            $unknown_label = $label if defined($label);
			
 
				+            $counter = $value + 1;
			
 
				+        } else {
			
 
				+            die "Invalid line: $line";
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    return ($trie, $counter, %lengths);
			
 
				+}
			
 
				+
			
 
				+# Generates an ASCII art tree
			
 
				+package TreeCodeGen {
			
 
				+
			
 
				+    sub new {
			
 
				+        my $class = shift;
			
 
				+        my $self = {};
			
 
				+        bless $self, $class;
			
 
				+
			
 
				+        return $self;
			
 
				+    }
			
 
				+
			
 
				+    sub word_to_label {
			
 
				+        my ($self, $word) = @_;
			
 
				+        return $word;
			
 
				+    }
			
 
				+
			
 
				+    sub main {
			
 
				+        my ($self, $trie, $counter, %lengths) = @_;
			
 
				+        printf $code ("┌────────────────────────────────────────────────────┐\n");
			
 
				+        printf $code ("│                   Initial trie                     │\n");
			
 
				+        printf $code ("└────────────────────────────────────────────────────┘\n");
			
 
				+        $self->print($trie);
			
 
				+        printf $code ("┌────────────────────────────────────────────────────┐\n");
			
 
				+        printf $code ("│                   Rebuilt trie                     │\n");
			
 
				+        printf $code ("└────────────────────────────────────────────────────┘\n");
			
 
				+        $self->print($trie->rebuild_tree());
			
 
				+
			
 
				+        foreach my $local_length (sort { $a <=> $b } (keys %lengths)) {
			
 
				+            printf $code ("┌────────────────────────────────────────────────────┐\n");
			
 
				+            printf $code ("│              Trie for words of length %-4d         │\n", $local_length);
			
 
				+            printf $code ("└────────────────────────────────────────────────────┘\n");
			
 
				+            $self->print($trie->filter_depth($local_length)->rebuild_tree());
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    sub open_output {
			
 
				+        my $self = shift;
			
 
				+        if ($code_name ne "-") {
			
 
				+            open($code, '>', $code_name) or die "Cannot open ".$ARGV[0].": $!" ;
			
 
				+        } else {
			
 
				+            $code = *STDOUT;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    # Print a trie
			
 
				+    sub print {
			
 
				+        my ($self, $trie, $depth) = @_;
			
 
				+        $depth //= 0;
			
 
				+
			
 
				+        print $code (" → ") if defined($trie->{label});
			
 
				+        print $code ($trie->{label} // "", "\n");
			
 
				+        foreach my $key (sort keys %{$trie->{children}}) {
			
 
				+            print $code ("│   " x ($depth), "├── $key");
			
 
				+            $self->print($trie->{children}{$key}, $depth + 1);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+my %codegens = (
			
 
				+    C => "CCodeGen",
			
 
				+    tree => "TreeCodeGen",
			
 
				+);
			
 
				+
			
 
				+
			
 
				+defined($codegens{$language}) or die "Unknown language $language. Valid choices: ", join(", ", keys %codegens);
			
 
				+my $codegen = $codegens{$language}->new();
			
 
				+my ($trie, $counter, %lengths) = build_trie($codegen);
			
 
				+
			
 
				+$codegen->open_output();
			
 
				+$codegen->main($trie, $counter, %lengths);
			
 
				+
			
 
				+
			
 
				+=head1 LICENSE
			
 
				+
			
 
				+triehash is available under the MIT/Expat license, see the source code
			
 
				+for more information.
			
 
				+
			
 
				+=head1 AUTHOR
			
 
				+
			
 
				+Julian Andres Klode <jak@jak-linux.org>
			
 
				+
			
 
				+=cut
			
 
				+