#!/bin/bash
#
src=html40.txt
url=http://www.w3.org/TR/1998/REC-html40-19980424/html40.txt
dest=htsentities.h
(
cat <)
for each entity. We should in theory check using strncmp() that we
actually have the correct entity, but this is actually statistically
not needed.
We may want to do better, but we expect the hash function to be uniform, and
let the compiler be smart enough to optimize the switch (for example by
checking in log2() intervals)
This code has been generated using the evil $0 script.
*/
static int decode_entity(const unsigned int hash, const size_t len) {
switch(hash) {
EOF
(
if test -f ${src}; then
cat ${src}
else
GET "${url}"
fi
) \
| grep -E '^$//' \
-e 's/\([^ ]*\) CDATA "\([^\"]*\);" -- \(.*\)/\1 \2 \3/'\
| ( \
read A
while test -n "$A"; do
ent="${A%% *}"
code=$(echo "$A"|cut -f2 -d' ')
# compute hash
hash=0
i=0
a=1664525
c=1013904223
m="$[1 << 32]"
while test "$i" -lt ${#ent}; do
d="$(echo -n "${ent:${i}:1}"|hexdump -v -e '/1 "%d"')"
hash="$[((${hash}*${a})%(${m})+${d}+${c})%(${m})]"
i=$[${i}+1]
done
echo -e " /* $A */"
echo -e " case ${hash}u:"
echo -e " if (len == ${#ent} /* && strncmp(ent, \"${ent}\") == 0 */) {"
echo -e " return ${code};"
echo -e " }"
echo -e " break;"
# next
read A
done
)
cat < ${dest}