diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-07-11 17:48:44 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-07-11 17:48:44 +0000 |
commit | 931062d509cdb83612936a977972be0a99913688 (patch) | |
tree | ed6a0ed0e7cf52ed0c6e78e3c406192b37037472 /src/htsbasiccharsets.sh | |
parent | 942bebf4b29cbae800614df17d9d4af650a8d3ba (diff) |
Added basic iconv-like primitives (for 8-bit non-MBCS charsets) for operating systems like Android, where iconv is very unfortunately missing.
Diffstat (limited to 'src/htsbasiccharsets.sh')
-rwxr-xr-x | src/htsbasiccharsets.sh | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/src/htsbasiccharsets.sh b/src/htsbasiccharsets.sh new file mode 100755 index 0000000..eb69da2 --- /dev/null +++ b/src/htsbasiccharsets.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# + +# Change this to download files +if false; then +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-*.TXT" | lftp +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP*.TXT" | lftp +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP*.TXT" | lftp +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP*.TXT" | lftp +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/CP*.TXT" | lftp +echo "mget ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8*.TXT" | lftp +rm -f CP932.TXT CP936.TXT CP949.TXT CP950.TXT +fi + +# Produce code +printf "/** GENERATED FILE ($0), DO NOT EDIT **/\n\n" +for i in *.TXT ; do + echo "processing $i" >&2 + grep -vE "^(#|$)" $i | grep -E "^0x" | sed -e 's/[[:space:]]/ /g' | cut -f1,2 -d' ' | \ + ( + unset arr + while read LINE ; do + from=$[$(echo $LINE | cut -f1 -d' ')] + if ! test -n "$from"; then + echo "error with $i" >&2 + exit 1 + elif test $from -ge 256; then + echo "out-of-range ($LINE) with $i" >&2 + exit 1 + fi + to=$(echo $LINE | cut -f2 -d' ') + arr[$from]=$to + done + name=$(echo $i | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/') + printf "/* Table for $i */\nstatic const hts_UCS4 table_${name}[256] = {\n " + i=0 + while test "$i" -lt 256; do + if test "$i" -gt 0; then + printf ", " + if test $[${i}%8] -eq 0; then + printf "\n " + fi + fi + value=${arr[$i]:-0} + printf "0x%04x" $value + i=$[${i}+1] + done + printf " };\n\n" + ) + echo "processed $i" >&2 +done + +# Indexes +printf "static const struct {\n const char *name;\n const hts_UCS4 *table;\n} table_mappings[] = {\n" +for i in *.TXT ; do + name=$(echo $i | tr 'A-Z' 'a-z' | tr '-' '_' | sed -e 's/\.txt//' -e 's/8859/iso_8859/') + printf " { \"$(echo $name | tr -d '_')\", table_${name} },\n" +done +printf " { NULL, NULL }\n};\n" |