diff options
author | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-30 19:04:51 +0000 |
---|---|---|
committer | Xavier Roche <xroche@users.noreply.github.com> | 2013-05-30 19:04:51 +0000 |
commit | 850f165f4ac90a6e6687c392ddfdd0c6a05b3fe5 (patch) | |
tree | 7cac2f01468639c4ab63fe523c17d7638e8cd2ac /src/htsentities.h | |
parent | 01af2a5e73f53ebf8a092e4bda77cd1326c1da11 (diff) |
Added hts_unescape_entities(), a rewrite of the HTML entities decoder.
Fixed HTML entities decoding which was done before charset decoding.
Diffstat (limited to 'src/htsentities.h')
-rw-r--r-- | src/htsentities.h | 1535 |
1 files changed, 1535 insertions, 0 deletions
diff --git a/src/htsentities.h b/src/htsentities.h new file mode 100644 index 0000000..bbb3a06 --- /dev/null +++ b/src/htsentities.h @@ -0,0 +1,1535 @@ +/* + -- htsentities.h -- + FILE GENERATED BY ./htsentities.sh, DO NOT MODIFY + + We compute the LCG hash + (see <http://en.wikipedia.org/wiki/Linear_congruential_generator>) + for each entity. We should in theory check using strncmp() that we + actually have the correct entity, but this is actually statistically + not needed. + + We may want to do better, but we expect the hash function to be uniform, and + let the compiler be smart enough to optimize the switch (for example by + checking in log2() intervals) + + This code has been generated using the evil ./htsentities.sh script. +*/ + +static int decode_entity(const unsigned int hash, const size_t len) { + switch(hash) { + /* nbsp 160 no-break space = non-breaking space, */ + case 3948425267: + if (len == 4 /* && strncmp(ent, "nbsp") == 0 */) { + return 160; + } + break; + /* iexcl 161 inverted exclamation mark, U+00A1 ISOnum */ + case 1499591408: + if (len == 5 /* && strncmp(ent, "iexcl") == 0 */) { + return 161; + } + break; + /* cent 162 cent sign, U+00A2 ISOnum */ + case 2824786826: + if (len == 4 /* && strncmp(ent, "cent") == 0 */) { + return 162; + } + break; + /* pound 163 pound sign, U+00A3 ISOnum */ + case 805305925: + if (len == 5 /* && strncmp(ent, "pound") == 0 */) { + return 163; + } + break; + /* curren 164 currency sign, U+00A4 ISOnum */ + case 1584829677: + if (len == 6 /* && strncmp(ent, "curren") == 0 */) { + return 164; + } + break; + /* yen 165 yen sign = yuan sign, U+00A5 ISOnum */ + case 3581281881: + if (len == 3 /* && strncmp(ent, "yen") == 0 */) { + return 165; + } + break; + /* brvbar 166 broken bar = broken vertical bar, */ + case 3768851825: + if (len == 6 /* && strncmp(ent, "brvbar") == 0 */) { + return 166; + } + break; + /* sect 167 section sign, U+00A7 ISOnum */ + case 2614630987: + if (len == 4 /* && strncmp(ent, "sect") == 0 */) { + return 167; + } + break; + /* uml 168 diaeresis = spacing diaeresis, */ + case 2036319259: + if (len == 3 /* && strncmp(ent, "uml") == 0 */) { + return 168; + } + break; + /* copy 169 copyright sign, U+00A9 ISOnum */ + case 2428845635: + if (len == 4 /* && strncmp(ent, "copy") == 0 */) { + return 169; + } + break; + /* ordf 170 feminine ordinal indicator, U+00AA ISOnum */ + case 212470411: + if (len == 4 /* && strncmp(ent, "ordf") == 0 */) { + return 170; + } + break; + /* laquo 171 left-pointing double angle quotation mark */ + case 401220509: + if (len == 5 /* && strncmp(ent, "laquo") == 0 */) { + return 171; + } + break; + /* not 172 not sign, U+00AC ISOnum */ + case 3607627678: + if (len == 3 /* && strncmp(ent, "not") == 0 */) { + return 172; + } + break; + /* shy 173 soft hyphen = discretionary hyphen, */ + case 1248857237: + if (len == 3 /* && strncmp(ent, "shy") == 0 */) { + return 173; + } + break; + /* reg 174 registered sign = registered trade mark sign, */ + case 854293939: + if (len == 3 /* && strncmp(ent, "reg") == 0 */) { + return 174; + } + break; + /* macr 175 macron = spacing macron = overline */ + case 591423527: + if (len == 4 /* && strncmp(ent, "macr") == 0 */) { + return 175; + } + break; + /* deg 176 degree sign, U+00B0 ISOnum */ + case 3990252661: + if (len == 3 /* && strncmp(ent, "deg") == 0 */) { + return 176; + } + break; + /* plusmn 177 plus-minus sign = plus-or-minus sign, */ + case 3641444957: + if (len == 6 /* && strncmp(ent, "plusmn") == 0 */) { + return 177; + } + break; + /* sup2 178 superscript two = superscript digit two */ + case 279450434: + if (len == 4 /* && strncmp(ent, "sup2") == 0 */) { + return 178; + } + break; + /* sup3 179 superscript three = superscript digit three */ + case 279450435: + if (len == 4 /* && strncmp(ent, "sup3") == 0 */) { + return 179; + } + break; + /* acute 180 acute accent = spacing acute, */ + case 1795641881: + if (len == 5 /* && strncmp(ent, "acute") == 0 */) { + return 180; + } + break; + /* micro 181 micro sign, U+00B5 ISOnum */ + case 1447763057: + if (len == 5 /* && strncmp(ent, "micro") == 0 */) { + return 181; + } + break; + /* para 182 pilcrow sign = paragraph sign, */ + case 848855704: + if (len == 4 /* && strncmp(ent, "para") == 0 */) { + return 182; + } + break; + /* middot 183 middle dot = Georgian comma */ + case 3167839463: + if (len == 6 /* && strncmp(ent, "middot") == 0 */) { + return 183; + } + break; + /* cedil 184 cedilla = spacing cedilla, U+00B8 ISOdia */ + case 1354214564: + if (len == 5 /* && strncmp(ent, "cedil") == 0 */) { + return 184; + } + break; + /* sup1 185 superscript one = superscript digit one, */ + case 279450433: + if (len == 4 /* && strncmp(ent, "sup1") == 0 */) { + return 185; + } + break; + /* ordm 186 masculine ordinal indicator, */ + case 212470418: + if (len == 4 /* && strncmp(ent, "ordm") == 0 */) { + return 186; + } + break; + /* raquo 187 right-pointing double angle quotation mark */ + case 1355124995: + if (len == 5 /* && strncmp(ent, "raquo") == 0 */) { + return 187; + } + break; + /* frac14 188 vulgar fraction one quarter */ + case 1016175271: + if (len == 6 /* && strncmp(ent, "frac14") == 0 */) { + return 188; + } + break; + /* frac12 189 vulgar fraction one half */ + case 1016175269: + if (len == 6 /* && strncmp(ent, "frac12") == 0 */) { + return 189; + } + break; + /* frac34 190 vulgar fraction three quarters */ + case 1019504321: + if (len == 6 /* && strncmp(ent, "frac34") == 0 */) { + return 190; + } + break; + /* iquest 191 inverted question mark */ + case 430057661: + if (len == 6 /* && strncmp(ent, "iquest") == 0 */) { + return 191; + } + break; + /* Agrave 192 latin capital letter A with grave */ + case 2815520320: + if (len == 6 /* && strncmp(ent, "Agrave") == 0 */) { + return 192; + } + break; + /* Aacute 193 latin capital letter A with acute, */ + case 4192391993: + if (len == 6 /* && strncmp(ent, "Aacute") == 0 */) { + return 193; + } + break; + /* Acirc 194 latin capital letter A with circumflex, */ + case 619920369: + if (len == 5 /* && strncmp(ent, "Acirc") == 0 */) { + return 194; + } + break; + /* Atilde 195 latin capital letter A with tilde, */ + case 4145258425: + if (len == 6 /* && strncmp(ent, "Atilde") == 0 */) { + return 195; + } + break; + /* Auml 196 latin capital letter A with diaeresis, */ + case 3558330427: + if (len == 4 /* && strncmp(ent, "Auml") == 0 */) { + return 196; + } + break; + /* Aring 197 latin capital letter A with ring above */ + case 1775583868: + if (len == 5 /* && strncmp(ent, "Aring") == 0 */) { + return 197; + } + break; + /* AElig 198 latin capital letter AE */ + case 3743972869: + if (len == 5 /* && strncmp(ent, "AElig") == 0 */) { + return 198; + } + break; + /* Ccedil 199 latin capital letter C with cedilla, */ + case 885931646: + if (len == 6 /* && strncmp(ent, "Ccedil") == 0 */) { + return 199; + } + break; + /* Egrave 200 latin capital letter E with grave, */ + case 1380421556: + if (len == 6 /* && strncmp(ent, "Egrave") == 0 */) { + return 200; + } + break; + /* Eacute 201 latin capital letter E with acute, */ + case 2757293229: + if (len == 6 /* && strncmp(ent, "Eacute") == 0 */) { + return 201; + } + break; + /* Ecirc 202 latin capital letter E with circumflex, */ + case 1255856693: + if (len == 5 /* && strncmp(ent, "Ecirc") == 0 */) { + return 202; + } + break; + /* Euml 203 latin capital letter E with diaeresis, */ + case 2436627087: + if (len == 4 /* && strncmp(ent, "Euml") == 0 */) { + return 203; + } + break; + /* Igrave 204 latin capital letter I with grave, */ + case 4240290088: + if (len == 6 /* && strncmp(ent, "Igrave") == 0 */) { + return 204; + } + break; + /* Iacute 205 latin capital letter I with acute, */ + case 1322194465: + if (len == 6 /* && strncmp(ent, "Iacute") == 0 */) { + return 205; + } + break; + /* Icirc 206 latin capital letter I with circumflex, */ + case 1891793017: + if (len == 5 /* && strncmp(ent, "Icirc") == 0 */) { + return 206; + } + break; + /* Iuml 207 latin capital letter I with diaeresis, */ + case 1314923747: + if (len == 4 /* && strncmp(ent, "Iuml") == 0 */) { + return 207; + } + break; + /* ETH 208 latin capital letter ETH, U+00D0 ISOlat1 */ + case 475229442: + if (len == 3 /* && strncmp(ent, "ETH") == 0 */) { + return 208; + } + break; + /* Ntilde 209 latin capital letter N with tilde, */ + case 2702412914: + if (len == 6 /* && strncmp(ent, "Ntilde") == 0 */) { + return 209; + } + break; + /* Ograve 210 latin capital letter O with grave, */ + case 4235125590: + if (len == 6 /* && strncmp(ent, "Ograve") == 0 */) { + return 210; + } + break; + /* Oacute 211 latin capital letter O with acute, */ + case 1317029967: + if (len == 6 /* && strncmp(ent, "Oacute") == 0 */) { + return 211; + } + break; + /* Ocirc 212 latin capital letter O with circumflex, */ + case 2845697503: + if (len == 5 /* && strncmp(ent, "Ocirc") == 0 */) { + return 212; + } + break; + /* Otilde 213 latin capital letter O with tilde, */ + case 1269896399: + if (len == 6 /* && strncmp(ent, "Otilde") == 0 */) { + return 213; + } + break; + /* Ouml 214 latin capital letter O with diaeresis, */ + case 1779852385: + if (len == 4 /* && strncmp(ent, "Ouml") == 0 */) { + return 214; + } + break; + /* times 215 multiplication sign, U+00D7 ISOnum */ + case 2139742557: + if (len == 5 /* && strncmp(ent, "times") == 0 */) { + return 215; + } + break; + /* Oslash 216 latin capital letter O with stroke */ + case 1378045056: + if (len == 6 /* && strncmp(ent, "Oslash") == 0 */) { + return 216; + } + break; + /* Ugrave 217 latin capital letter U with grave, */ + case 4229961092: + if (len == 6 /* && strncmp(ent, "Ugrave") == 0 */) { + return 217; + } + break; + /* Uacute 218 latin capital letter U with acute, */ + case 1311865469: + if (len == 6 /* && strncmp(ent, "Uacute") == 0 */) { + return 218; + } + break; + /* Ucirc 219 latin capital letter U with circumflex, */ + case 3799601989: + if (len == 5 /* && strncmp(ent, "Ucirc") == 0 */) { + return 219; + } + break; + /* Uuml 220 latin capital letter U with diaeresis, */ + case 2244781023: + if (len == 4 /* && strncmp(ent, "Uuml") == 0 */) { + return 220; + } + break; + /* Yacute 221 latin capital letter Y with acute, */ + case 4171734001: + if (len == 6 /* && strncmp(ent, "Yacute") == 0 */) { + return 221; + } + break; + /* THORN 222 latin capital letter THORN, */ + case 4251263774: + if (len == 5 /* && strncmp(ent, "THORN") == 0 */) { + return 222; + } + break; + /* szlig 223 latin small letter sharp s = ess-zed, */ + case 51833136: + if (len == 5 /* && strncmp(ent, "szlig") == 0 */) { + return 223; + } + break; + /* agrave 224 latin small letter a with grave */ + case 4219632096: + if (len == 6 /* && strncmp(ent, "agrave") == 0 */) { + return 224; + } + break; + /* aacute 225 latin small letter a with acute, */ + case 1301536473: + if (len == 6 /* && strncmp(ent, "aacute") == 0 */) { + return 225; + } + break; + /* acirc 226 latin small letter a with circumflex, */ + case 1412443665: + if (len == 5 /* && strncmp(ent, "acirc") == 0 */) { + return 226; + } + break; + /* atilde 227 latin small letter a with tilde, */ + case 1254402905: + if (len == 6 /* && strncmp(ent, "atilde") == 0 */) { + return 227; + } + break; + /* auml 228 latin small letter a with diaeresis, */ + case 3174638299: + if (len == 4 /* && strncmp(ent, "auml") == 0 */) { + return 228; + } + break; + /* aring 229 latin small letter a with ring above */ + case 2568107164: + if (len == 5 /* && strncmp(ent, "aring") == 0 */) { + return 229; + } + break; + /* aelig 230 latin small letter ae */ + case 4152804037: + if (len == 5 /* && strncmp(ent, "aelig") == 0 */) { + return 230; + } + break; + /* ccedil 231 latin small letter c with cedilla, */ + case 2290043422: + if (len == 6 /* && strncmp(ent, "ccedil") == 0 */) { + return 231; + } + break; + /* egrave 232 latin small letter e with grave, */ + case 2784533332: + if (len == 6 /* && strncmp(ent, "egrave") == 0 */) { + return 232; + } + break; + /* eacute 233 latin small letter e with acute, */ + case 4161405005: + if (len == 6 /* && strncmp(ent, "eacute") == 0 */) { + return 233; + } + break; + /* ecirc 234 latin small letter e with circumflex, */ + case 2048379989: + if (len == 5 /* && strncmp(ent, "ecirc") == 0 */) { + return 234; + } + break; + /* euml 235 latin small letter e with diaeresis, */ + case 2052934959: + if (len == 4 /* && strncmp(ent, "euml") == 0 */) { + return 235; + } + break; + /* igrave 236 latin small letter i with grave, */ + case 1349434568: + if (len == 6 /* && strncmp(ent, "igrave") == 0 */) { + return 236; + } + break; + /* iacute 237 latin small letter i with acute, */ + case 2726306241: + if (len == 6 /* && strncmp(ent, "iacute") == 0 */) { + return 237; + } + break; + /* icirc 238 latin small letter i with circumflex, */ + case 2684316313: + if (len == 5 /* && strncmp(ent, "icirc") == 0 */) { + return 238; + } + break; + /* iuml 239 latin small letter i with diaeresis, */ + case 931231619: + if (len == 4 /* && strncmp(ent, "iuml") == 0 */) { + return 239; + } + break; + /* eth 240 latin small letter eth, U+00F0 ISOlat1 */ + case 109822946: + if (len == 3 /* && strncmp(ent, "eth") == 0 */) { + return 240; + } + break; + /* ntilde 241 latin small letter n with tilde, */ + case 4106524690: + if (len == 6 /* && strncmp(ent, "ntilde") == 0 */) { + return 241; + } + break; + /* ograve 242 latin small letter o with grave, */ + case 1344270070: + if (len == 6 /* && strncmp(ent, "ograve") == 0 */) { + return 242; + } + break; + /* oacute 243 latin small letter o with acute, */ + case 2721141743: + if (len == 6 /* && strncmp(ent, "oacute") == 0 */) { + return 243; + } + break; + /* ocirc 244 latin small letter o with circumflex, */ + case 3638220799: + if (len == 5 /* && strncmp(ent, "ocirc") == 0 */) { + return 244; + } + break; + /* otilde 245 latin small letter o with tilde, */ + case 2674008175: + if (len == 6 /* && strncmp(ent, "otilde") == 0 */) { + return 245; + } + break; + /* ouml 246 latin small letter o with diaeresis, */ + case 1396160257: + if (len == 4 /* && strncmp(ent, "ouml") == 0 */) { + return 246; + } + break; + /* divide 247 division sign, U+00F7 ISOnum */ + case 2204943563: + if (len == 6 /* && strncmp(ent, "divide") == 0 */) { + return 247; + } + break; + /* oslash 248 latin small letter o with stroke, */ + case 2782156832: + if (len == 6 /* && strncmp(ent, "oslash") == 0 */) { + return 248; + } + break; + /* ugrave 249 latin small letter u with grave, */ + case 1339105572: + if (len == 6 /* && strncmp(ent, "ugrave") == 0 */) { + return 249; + } + break; + /* uacute 250 latin small letter u with acute, */ + case 2715977245: + if (len == 6 /* && strncmp(ent, "uacute") == 0 */) { + return 250; + } + break; + /* ucirc 251 latin small letter u with circumflex, */ + case 297157989: + if (len == 5 /* && strncmp(ent, "ucirc") == 0 */) { + return 251; + } + break; + /* uuml 252 latin small letter u with diaeresis, */ + case 1861088895: + if (len == 4 /* && strncmp(ent, "uuml") == 0 */) { + return 252; + } + break; + /* yacute 253 latin small letter y with acute, */ + case 1280878481: + if (len == 6 /* && strncmp(ent, "yacute") == 0 */) { + return 253; + } + break; + /* thorn 254 latin small letter thorn with, */ + case 4294688446: + if (len == 5 /* && strncmp(ent, "thorn") == 0 */) { + return 254; + } + break; + /* yuml 255 latin small letter y with diaeresis, */ + case 739385555: + if (len == 4 /* && strncmp(ent, "yuml") == 0 */) { + return 255; + } + break; + /* fnof 402 latin small f with hook = function */ + case 2270075705: + if (len == 4 /* && strncmp(ent, "fnof") == 0 */) { + return 402; + } + break; + /* Alpha 913 greek capital letter alpha, U+0391 */ + case 4027656009: + if (len == 5 /* && strncmp(ent, "Alpha") == 0 */) { + return 913; + } + break; + /* Beta 914 greek capital letter beta, U+0392 */ + case 277666448: + if (len == 4 /* && strncmp(ent, "Beta") == 0 */) { + return 914; + } + break; + /* Gamma 915 greek capital letter gamma, */ + case 1537149070: + if (len == 5 /* && strncmp(ent, "Gamma") == 0 */) { + return 915; + } + break; + /* Delta 916 greek capital letter delta, */ + case 3855542753: + if (len == 5 /* && strncmp(ent, "Delta") == 0 */) { + return 916; + } + break; + /* Epsilon 917 greek capital letter epsilon, U+0395 */ + case 2449300823: + if (len == 7 /* && strncmp(ent, "Epsilon") == 0 */) { + return 917; + } + break; + /* Zeta 918 greek capital letter zeta, U+0396 */ + case 2137381000: + if (len == 4 /* && strncmp(ent, "Zeta") == 0 */) { + return 918; + } + break; + /* Eta 919 greek capital letter eta, U+0397 */ + case 528494267: + if (len == 3 /* && strncmp(ent, "Eta") == 0 */) { + return 919; + } + break; + /* Theta 920 greek capital letter theta, */ + case 3904764433: + if (len == 5 /* && strncmp(ent, "Theta") == 0 */) { + return 920; + } + break; + /* Iota 921 greek capital letter iota, U+0399 */ + case 3284124477: + if (len == 4 /* && strncmp(ent, "Iota") == 0 */) { + return 921; + } + break; + /* Kappa 922 greek capital letter kappa, U+039A */ + case 3346788084: + if (len == 5 /* && strncmp(ent, "Kappa") == 0 */) { + return 922; + } + break; + /* Lambda 923 greek capital letter lambda, */ + case 1824315307: + if (len == 6 /* && strncmp(ent, "Lambda") == 0 */) { + return 923; + } + break; + /* Mu 924 greek capital letter mu, U+039C */ + case 1324604304: + if (len == 2 /* && strncmp(ent, "Mu") == 0 */) { + return 924; + } + break; + /* Nu 925 greek capital letter nu, U+039D */ + case 1326268829: + if (len == 2 /* && strncmp(ent, "Nu") == 0 */) { + return 925; + } + break; + /* Xi 926 greek capital letter xi, U+039E ISOgrk3 */ + case 1342914067: + if (len == 2 /* && strncmp(ent, "Xi") == 0 */) { + return 926; + } + break; + /* Omicron 927 greek capital letter omicron, U+039F */ + case 488730696: + if (len == 7 /* && strncmp(ent, "Omicron") == 0 */) { + return 927; + } + break; + /* Pi 928 greek capital letter pi, U+03A0 ISOgrk3 */ + case 1329597867: + if (len == 2 /* && strncmp(ent, "Pi") == 0 */) { + return 928; + } + break; + /* Rho 929 greek capital letter rho, U+03A1 */ + case 1277958850: + if (len == 3 /* && strncmp(ent, "Rho") == 0 */) { + return 929; + } + break; + /* Sigma 931 greek capital letter sigma, */ + case 3159100428: + if (len == 5 /* && strncmp(ent, "Sigma") == 0 */) { + return 931; + } + break; + /* Tau 932 greek capital letter tau, U+03A4 */ + case 2045446591: + if (len == 3 /* && strncmp(ent, "Tau") == 0 */) { + return 932; + } + break; + /* Upsilon 933 greek capital letter upsilon, */ + case 2291992807: + if (len == 7 /* && strncmp(ent, "Upsilon") == 0 */) { + return 933; + } + break; + /* Phi 934 greek capital letter phi, */ + case 498819434: + if (len == 3 /* && strncmp(ent, "Phi") == 0 */) { + return 934; + } + break; + /* Chi 935 greek capital letter chi, U+03A7 */ + case 4024347861: + if (len == 3 /* && strncmp(ent, "Chi") == 0 */) { + return 935; + } + break; + /* Psi 936 greek capital letter psi, */ + case 517129209: + if (len == 3 /* && strncmp(ent, "Psi") == 0 */) { + return 936; + } + break; + /* Omega 937 greek capital letter omega, */ + case 612334204: + if (len == 5 /* && strncmp(ent, "Omega") == 0 */) { + return 937; + } + break; + /* alpha 945 greek small letter alpha, */ + case 525212009: + if (len == 5 /* && strncmp(ent, "alpha") == 0 */) { + return 945; + } + break; + /* beta 946 greek small letter beta, U+03B2 ISOgrk3 */ + case 4188941616: + if (len == 4 /* && strncmp(ent, "beta") == 0 */) { + return 946; + } + break; + /* gamma 947 greek small letter gamma, */ + case 2329672366: + if (len == 5 /* && strncmp(ent, "gamma") == 0 */) { + return 947; + } + break; + /* delta 948 greek small letter delta, */ + case 353098753: + if (len == 5 /* && strncmp(ent, "delta") == 0 */) { + return 948; + } + break; + /* epsilon 949 greek small letter epsilon, */ + case 2134684791: + if (len == 7 /* && strncmp(ent, "epsilon") == 0 */) { + return 949; + } + break; + /* zeta 950 greek small letter zeta, U+03B6 ISOgrk3 */ + case 1753688872: + if (len == 4 /* && strncmp(ent, "zeta") == 0 */) { + return 950; + } + break; + /* eta 951 greek small letter eta, U+03B7 ISOgrk3 */ + case 109822939: + if (len == 3 /* && strncmp(ent, "eta") == 0 */) { + return 951; + } + break; + /* theta 952 greek small letter theta, */ + case 402320433: + if (len == 5 /* && strncmp(ent, "theta") == 0 */) { + return 952; + } + break; + /* iota 953 greek small letter iota, U+03B9 ISOgrk3 */ + case 2900432349: + if (len == 4 /* && strncmp(ent, "iota") == 0 */) { + return 953; + } + break; + /* kappa 954 greek small letter kappa, */ + case 4139311380: + if (len == 5 /* && strncmp(ent, "kappa") == 0 */) { + return 954; + } + break; + /* lambda 955 greek small letter lambda, */ + case 3228427083: + if (len == 6 /* && strncmp(ent, "lambda") == 0 */) { + return 955; + } + break; + /* mu 956 greek small letter mu, U+03BC ISOgrk3 */ + case 1377869104: + if (len == 2 /* && strncmp(ent, "mu") == 0 */) { + return 956; + } + break; + /* nu 957 greek small letter nu, U+03BD ISOgrk3 */ + case 1379533629: + if (len == 2 /* && strncmp(ent, "nu") == 0 */) { + return 957; + } + break; + /* xi 958 greek small letter xi, U+03BE ISOgrk3 */ + case 1396178867: + if (len == 2 /* && strncmp(ent, "xi") == 0 */) { + return 958; + } + break; + /* omicron 959 greek small letter omicron, U+03BF NEW */ + case 174114664: + if (len == 7 /* && strncmp(ent, "omicron") == 0 */) { + return 959; + } + break; + /* pi 960 greek small letter pi, U+03C0 ISOgrk3 */ + case 1382862667: + if (len == 2 /* && strncmp(ent, "pi") == 0 */) { + return 960; + } + break; + /* rho 961 greek small letter rho, U+03C1 ISOgrk3 */ + case 859287522: + if (len == 3 /* && strncmp(ent, "rho") == 0 */) { + return 961; + } + break; + /* sigmaf 962 greek small letter final sigma, */ + case 2582995969: + if (len == 6 /* && strncmp(ent, "sigmaf") == 0 */) { + return 962; + } + break; + /* sigma 963 greek small letter sigma, */ + case 3951623724: + if (len == 5 /* && strncmp(ent, "sigma") == 0 */) { + return 963; + } + break; + /* tau 964 greek small letter tau, U+03C4 ISOgrk3 */ + case 1626775263: + if (len == 3 /* && strncmp(ent, "tau") == 0 */) { + return 964; + } + break; + /* upsilon 965 greek small letter upsilon, */ + case 1977376775: + if (len == 7 /* && strncmp(ent, "upsilon") == 0 */) { + return 965; + } + break; + /* phi 966 greek small letter phi, U+03C6 ISOgrk3 */ + case 80148106: + if (len == 3 /* && strncmp(ent, "phi") == 0 */) { + return 966; + } + break; + /* chi 967 greek small letter chi, U+03C7 ISOgrk3 */ + case 3605676533: + if (len == 3 /* && strncmp(ent, "chi") == 0 */) { + return 967; + } + break; + /* psi 968 greek small letter psi, U+03C8 ISOgrk3 */ + case 98457881: + if (len == 3 /* && strncmp(ent, "psi") == 0 */) { + return 968; + } + break; + /* omega 969 greek small letter omega, */ + case 1404857500: + if (len == 5 /* && strncmp(ent, "omega") == 0 */) { + return 969; + } + break; + /* thetasym 977 greek small letter theta symbol, */ + case 3881711083: + if (len == 8 /* && strncmp(ent, "thetasym") == 0 */) { + return 977; + } + break; + /* upsih 978 greek upsilon with hook symbol, */ + case 3753563936: + if (len == 5 /* && strncmp(ent, "upsih") == 0 */) { + return 978; + } + break; + /* piv 982 greek pi symbol, U+03D6 ISOgrk3 */ + case 81812644: + if (len == 3 /* && strncmp(ent, "piv") == 0 */) { + return 982; + } + break; + /* bull 8226 bullet = black small circle, */ + case 1818806115: + if (len == 4 /* && strncmp(ent, "bull") == 0 */) { + return 8226; + } + break; + /* hellip 8230 horizontal ellipsis = three dot leader, */ + case 1967714928: + if (len == 6 /* && strncmp(ent, "hellip") == 0 */) { + return 8230; + } + break; + /* prime 8242 prime = minutes = feet, U+2032 ISOtech */ + case 656236556: + if (len == 5 /* && strncmp(ent, "prime") == 0 */) { + return 8242; + } + break; + /* Prime 8243 double prime = seconds = inches, */ + case 4158680556: + if (len == 5 /* && strncmp(ent, "Prime") == 0 */) { + return 8243; + } + break; + /* oline 8254 overline = spacing overscore, */ + case 33988362: + if (len == 5 /* && strncmp(ent, "oline") == 0 */) { + return 8254; + } + break; + /* frasl 8260 fraction slash, U+2044 NEW */ + case 254792559: + if (len == 5 /* && strncmp(ent, "frasl") == 0 */) { + return 8260; + } + break; + /* weierp 8472 script capital P = power set */ + case 3305299450: + if (len == 6 /* && strncmp(ent, "weierp") == 0 */) { + return 8472; + } + break; + /* image 8465 blackletter capital I = imaginary part, */ + case 3187641494: + if (len == 5 /* && strncmp(ent, "image") == 0 */) { + return 8465; + } + break; + /* real 8476 blackletter capital R = real part symbol, */ + case 3965469588: + if (len == 4 /* && strncmp(ent, "real") == 0 */) { + return 8476; + } + break; + /* trade 8482 trade mark sign, U+2122 ISOnum */ + case 2455601811: + if (len == 5 /* && strncmp(ent, "trade") == 0 */) { + return 8482; + } + break; + /* alefsym 8501 alef symbol = first transfinite cardinal, */ + case 3894502290: + if (len == 7 /* && strncmp(ent, "alefsym") == 0 */) { + return 8501; + } + break; + /* larr 8592 leftwards arrow, U+2190 ISOnum */ + case 1970559061: + if (len == 4 /* && strncmp(ent, "larr") == 0 */) { + return 8592; + } + break; + /* uarr 8593 upwards arrow, U+2191 ISOnum */ + case 2667952018: + if (len == 4 /* && strncmp(ent, "uarr") == 0 */) { + return 8593; + } + break; + /* rarr 8594 rightwards arrow, U+2192 ISOnum */ + case 2435487699: + if (len == 4 /* && strncmp(ent, "rarr") == 0 */) { + return 8594; + } + break; + /* darr 8595 downwards arrow, U+2193 ISOnum */ + case 4213965741: + if (len == 4 /* && strncmp(ent, "darr") == 0 */) { + return 8595; + } + break; + /* harr 8596 left right arrow, U+2194 ISOamsa */ + case 3092262401: + if (len == 4 /* && strncmp(ent, "harr") == 0 */) { + return 8596; + } + break; + /* crarr 8629 downwards arrow with corner leftwards */ + case 4071143093: + if (len == 5 /* && strncmp(ent, "crarr") == 0 */) { + return 8629; + } + break; + /* lArr 8656 leftwards double arrow, U+21D0 ISOtech */ + case 2389230389: + if (len == 4 /* && strncmp(ent, "lArr") == 0 */) { + return 8656; + } + break; + /* uArr 8657 upwards double arrow, U+21D1 ISOamsa */ + case 3086623346: + if (len == 4 /* && strncmp(ent, "uArr") == 0 */) { + return 8657; + } + break; + /* rArr 8658 rightwards double arrow, */ + case 2854159027: + if (len == 4 /* && strncmp(ent, "rArr") == 0 */) { + return 8658; + } + break; + /* dArr 8659 downwards double arrow, U+21D3 ISOamsa */ + case 337669773: + if (len == 4 /* && strncmp(ent, "dArr") == 0 */) { + return 8659; + } + break; + /* hArr 8660 left right double arrow, */ + case 3510933729: + if (len == 4 /* && strncmp(ent, "hArr") == 0 */) { + return 8660; + } + break; + /* forall 8704 for all, U+2200 ISOtech */ + case 2607244222: + if (len == 6 /* && strncmp(ent, "forall") == 0 */) { + return 8704; + } + break; + /* part 8706 partial differential, U+2202 ISOtech */ + case 848855723: + if (len == 4 /* && strncmp(ent, "part") == 0 */) { + return 8706; + } + break; + /* exist 8707 there exists, U+2203 ISOtech */ + case 3677294764: + if (len == 5 /* && strncmp(ent, "exist") == 0 */) { + return 8707; + } + break; + /* empty 8709 empty set = null set = diameter, */ + case 4121922294: + if (len == 5 /* && strncmp(ent, "empty") == 0 */) { + return 8709; + } + break; + /* nabla 8711 nabla = backward difference, */ + case 3450596949: + if (len == 5 /* && strncmp(ent, "nabla") == 0 */) { + return 8711; + } + break; + /* isin 8712 element of, U+2208 ISOtech */ + case 145434111: + if (len == 4 /* && strncmp(ent, "isin") == 0 */) { + return 8712; + } + break; + /* notin 8713 not an element of, U+2209 ISOtech */ + case 89445443: + if (len == 5 /* && strncmp(ent, "notin") == 0 */) { + return 8713; + } + break; + /* ni 8715 contains as member, U+220B ISOtech */ + case 1379533617: + if (len == 2 /* && strncmp(ent, "ni") == 0 */) { + return 8715; + } + break; + /* prod 8719 n-ary product = product sign, */ + case 3171579821: + if (len == 4 /* && strncmp(ent, "prod") == 0 */) { + return 8719; + } + break; + /* sum 8721 n-ary sumation, U+2211 ISOamsb */ + case 1270496050: + if (len == 3 /* && strncmp(ent, "sum") == 0 */) { + return 8721; + } + break; + /* minus 8722 minus sign, U+2212 ISOtech */ + case 1443056095: + if (len == 5 /* && strncmp(ent, "minus") == 0 */) { + return 8722; + } + break; + /* lowast 8727 asterisk operator, U+2217 ISOtech */ + case 137860408: + if (len == 6 /* && strncmp(ent, "lowast") == 0 */) { + return 8727; + } + break; + /* radic 8730 square root = radical sign, */ + case 565711814: + if (len == 5 /* && strncmp(ent, "radic") == 0 */) { + return 8730; + } + break; + /* prop 8733 proportional to, U+221D ISOtech */ + case 3171579833: + if (len == 4 /* && strncmp(ent, "prop") == 0 */) { + return 8733; + } + break; + /* infin 8734 infinity, U+221E ISOtech */ + case 3784651419: + if (len == 5 /* && strncmp(ent, "infin") == 0 */) { + return 8734; + } + break; + /* ang 8736 angle, U+2220 ISOamso */ + case 2836524271: + if (len == 3 /* && strncmp(ent, "ang") == 0 */) { + return 8736; + } + break; + /* and 8743 logical and = wedge, U+2227 ISOtech */ + case 2836524268: + if (len == 3 /* && strncmp(ent, "and") == 0 */) { + return 8743; + } + break; + /* or 8744 logical or = vee, U+2228 ISOtech */ + case 1381198151: + if (len == 2 /* && strncmp(ent, "or") == 0 */) { + return 8744; + } + break; + /* cap 8745 intersection = cap, U+2229 ISOtech */ + case 3594024865: + if (len == 3 /* && strncmp(ent, "cap") == 0 */) { + return 8745; + } + break; + /* cup 8746 union = cup, U+222A ISOtech */ + case 3627315365: + if (len == 3 /* && strncmp(ent, "cup") == 0 */) { + return 8746; + } + break; + /* int 8747 integral, U+222B ISOtech */ + case 1658114628: + if (len == 3 /* && strncmp(ent, "int") == 0 */) { + return 8747; + } + break; + /* there4 8756 therefore, U+2234 ISOtech */ + case 1359369970: + if (len == 6 /* && strncmp(ent, "there4") == 0 */) { + return 8756; + } + break; + /* sim 8764 tilde operator = varies with = similar to, */ + case 1250521750: + if (len == 3 /* && strncmp(ent, "sim") == 0 */) { + return 8764; + } + break; + /* cong 8773 approximately equal to, U+2245 ISOtech */ + case 2425516567: + if (len == 4 /* && strncmp(ent, "cong") == 0 */) { + return 8773; + } + break; + /* asymp 8776 almost equal to = asymptotic to, */ + case 3150422973: + if (len == 5 /* && strncmp(ent, "asymp") == 0 */) { + return 8776; + } + break; + /* ne 8800 not equal to, U+2260 ISOtech */ + case 1379533613: + if (len == 2 /* && strncmp(ent, "ne") == 0 */) { + return 8800; + } + break; + /* equiv 8801 identical to, U+2261 ISOtech */ + case 634790405: + if (len == 5 /* && strncmp(ent, "equiv") == 0 */) { + return 8801; + } + break; + /* le 8804 less-than or equal to, U+2264 ISOtech */ + case 1376204563: + if (len == 2 /* && strncmp(ent, "le") == 0 */) { + return 8804; + } + break; + /* ge 8805 greater-than or equal to, */ + case 1367881938: + if (len == 2 /* && strncmp(ent, "ge") == 0 */) { + return 8805; + } + break; + /* sub 8834 subset of, U+2282 ISOtech */ + case 1270496039: + if (len == 3 /* && strncmp(ent, "sub") == 0 */) { + return 8834; + } + break; + /* sup 8835 superset of, U+2283 ISOtech */ + case 1270496053: + if (len == 3 /* && strncmp(ent, "sup") == 0 */) { + return 8835; + } + break; + /* nsub 8836 not a subset of, U+2284 ISOamsn */ + case 1984504696: + if (len == 4 /* && strncmp(ent, "nsub") == 0 */) { + return 8836; + } + break; + /* sube 8838 subset of or equal to, U+2286 ISOtech */ + case 256147135: + if (len == 4 /* && strncmp(ent, "sube") == 0 */) { + return 8838; + } + break; + /* supe 8839 superset of or equal to, */ + case 279450485: + if (len == 4 /* && strncmp(ent, "supe") == 0 */) { + return 8839; + } + break; + /* oplus 8853 circled plus = direct sum, */ + case 92645826: + if (len == 5 /* && strncmp(ent, "oplus") == 0 */) { + return 8853; + } + break; + /* otimes 8855 circled times = vector product, */ + case 3065242419: + if (len == 6 /* && strncmp(ent, "otimes") == 0 */) { + return 8855; + } + break; + /* perp 8869 up tack = orthogonal to = perpendicular, */ + case 2407134539: + if (len == 4 /* && strncmp(ent, "perp") == 0 */) { + return 8869; + } + break; + /* sdot 8901 dot operator, U+22C5 ISOamsb */ + case 2245035582: + if (len == 4 /* && strncmp(ent, "sdot") == 0 */) { + return 8901; + } + break; + /* lceil 8968 left ceiling = apl upstile, */ + case 1588009020: + if (len == 5 /* && strncmp(ent, "lceil") == 0 */) { + return 8968; + } + break; + /* rceil 8969 right ceiling, U+2309 ISOamsc */ + case 2541913506: + if (len == 5 /* && strncmp(ent, "rceil") == 0 */) { + return 8969; + } + break; + /* lfloor 8970 left floor = apl downstile, */ + case 1870296512: + if (len == 6 /* && strncmp(ent, "lfloor") == 0 */) { + return 8970; + } + break; + /* rfloor 8971 right floor, U+230B ISOamsc */ + case 1865132014: + if (len == 6 /* && strncmp(ent, "rfloor") == 0 */) { + return 8971; + } + break; + /* lang 9001 left-pointing angle bracket = bra, */ + case 1963900950: + if (len == 4 /* && strncmp(ent, "lang") == 0 */) { + return 9001; + } + break; + /* rang 9002 right-pointing angle bracket = ket, */ + case 2428829588: + if (len == 4 /* && strncmp(ent, "rang") == 0 */) { + return 9002; + } + break; + /* loz 9674 lozenge, U+25CA ISOpub */ + case 2828488274: + if (len == 3 /* && strncmp(ent, "loz") == 0 */) { + return 9674; + } + break; + /* spades 9824 black spade suit, U+2660 ISOpub */ + case 4026453962: + if (len == 6 /* && strncmp(ent, "spades") == 0 */) { + return 9824; + } + break; + /* clubs 9827 black club suit = shamrock, */ + case 2781041564: + if (len == 5 /* && strncmp(ent, "clubs") == 0 */) { + return 9827; + } + break; + /* hearts 9829 black heart suit = valentine, */ + case 2039418001: + if (len == 6 /* && strncmp(ent, "hearts") == 0 */) { + return 9829; + } + break; + /* diams 9830 black diamond suit, U+2666 ISOpub */ + case 3524411593: + if (len == 5 /* && strncmp(ent, "diams") == 0 */) { + return 9830; + } + break; + /* quot 34 quotation mark = APL quote, */ + case 2986121293: + if (len == 4 /* && strncmp(ent, "quot") == 0 */) { + return 34; + } + break; + /* amp 38 ampersand, U+0026 ISOnum */ + case 2834859755: + if (len == 3 /* && strncmp(ent, "amp") == 0 */) { + return 38; + } + break; + /* lt 60 less-than sign, U+003C ISOnum */ + case 1376204578: + if (len == 2 /* && strncmp(ent, "lt") == 0 */) { + return 60; + } + break; + /* gt 62 greater-than sign, U+003E ISOnum */ + case 1367881953: + if (len == 2 /* && strncmp(ent, "gt") == 0 */) { + return 62; + } + break; + /* OElig 338 latin capital ligature OE, */ + case 1674782707: + if (len == 5 /* && strncmp(ent, "OElig") == 0 */) { + return 338; + } + break; + /* oelig 339 latin small ligature oe, U+0153 ISOlat2 */ + case 2083613875: + if (len == 5 /* && strncmp(ent, "oelig") == 0 */) { + return 339; + } + break; + /* Scaron 352 latin capital letter S with caron, */ + case 1731202952: + if (len == 6 /* && strncmp(ent, "Scaron") == 0 */) { + return 352; + } + break; + /* scaron 353 latin small letter s with caron, */ + case 3135314728: + if (len == 6 /* && strncmp(ent, "scaron") == 0 */) { + return 353; + } + break; + /* Yuml 376 latin capital letter Y with diaeresis, */ + case 1123077683: + if (len == 4 /* && strncmp(ent, "Yuml") == 0 */) { + return 376; + } + break; + /* circ 710 modifier letter circumflex accent, */ + case 94756433: + if (len == 4 /* && strncmp(ent, "circ") == 0 */) { + return 710; + } + break; + /* tilde 732 small tilde, U+02DC ISOdia */ + case 1748508313: + if (len == 5 /* && strncmp(ent, "tilde") == 0 */) { + return 732; + } + break; + /* ensp 8194 en space, U+2002 ISOpub */ + case 3630901474: + if (len == 4 /* && strncmp(ent, "ensp") == 0 */) { + return 8194; + } + break; + /* emsp 8195 em space, U+2003 ISOpub */ + case 3241331769: + if (len == 4 /* && strncmp(ent, "emsp") == 0 */) { + return 8195; + } + break; + /* thinsp 8201 thin space, U+2009 ISOpub */ + case 2997658516: + if (len == 6 /* && strncmp(ent, "thinsp") == 0 */) { + return 8201; + } + break; + /* zwnj 8204 zero width non-joiner, */ + case 166021829: + if (len == 4 /* && strncmp(ent, "zwnj") == 0 */) { + return 8204; + } + break; + /* zwj 8205 zero width joiner, U+200D NEW RFC 2070 */ + case 4000813032: + if (len == 3 /* && strncmp(ent, "zwj") == 0 */) { + return 8205; + } + break; + /* lrm 8206 left-to-right mark, U+200E NEW RFC 2070 */ + case 2833481836: + if (len == 3 /* && strncmp(ent, "lrm") == 0 */) { + return 8206; + } + break; + /* rlm 8207 right-to-left mark, U+200F NEW RFC 2070 */ + case 865945620: + if (len == 3 /* && strncmp(ent, "rlm") == 0 */) { + return 8207; + } + break; + /* ndash 8211 en dash, U+2013 ISOpub */ + case 3305143245: + if (len == 5 /* && strncmp(ent, "ndash") == 0 */) { + return 8211; + } + break; + /* mdash 8212 em dash, U+2014 ISOpub */ + case 3146159164: + if (len == 5 /* && strncmp(ent, "mdash") == 0 */) { + return 8212; + } + break; + /* lsquo 8216 left single quotation mark, */ + case 1796006423: + if (len == 5 /* && strncmp(ent, "lsquo") == 0 */) { + return 8216; + } + break; + /* rsquo 8217 right single quotation mark, */ + case 2749910909: + if (len == 5 /* && strncmp(ent, "rsquo") == 0 */) { + return 8217; + } + break; + /* sbquo 8218 single low-9 quotation mark, U+201A NEW */ + case 159941417: + if (len == 5 /* && strncmp(ent, "sbquo") == 0 */) { + return 8218; + } + break; + /* ldquo 8220 left double quotation mark, */ + case 633684828: + if (len == 5 /* && strncmp(ent, "ldquo") == 0 */) { + return 8220; + } + break; + /* rdquo 8221 right double quotation mark, */ + case 1587589314: + if (len == 5 /* && strncmp(ent, "rdquo") == 0 */) { + return 8221; + } + break; + /* bdquo 8222 double low-9 quotation mark, U+201E NEW */ + case 3338811314: + if (len == 5 /* && strncmp(ent, "bdquo") == 0 */) { + return 8222; + } + break; + /* dagger 8224 dagger, U+2020 ISOpub */ + case 3288241744: + if (len == 6 /* && strncmp(ent, "dagger") == 0 */) { + return 8224; + } + break; + /* Dagger 8225 double dagger, U+2021 ISOpub */ + case 1884129968: + if (len == 6 /* && strncmp(ent, "Dagger") == 0 */) { + return 8225; + } + break; + /* permil 8240 per mille sign, U+2030 ISOtech */ + case 4246983035: + if (len == 6 /* && strncmp(ent, "permil") == 0 */) { + return 8240; + } + break; + /* lsaquo 8249 single left-pointing angle quotation mark, */ + case 2442191187: + if (len == 6 /* && strncmp(ent, "lsaquo") == 0 */) { + return 8249; + } + break; + /* rsaquo 8250 single right-pointing angle quotation mark, */ + case 2437026689: + if (len == 6 /* && strncmp(ent, "rsaquo") == 0 */) { + return 8250; + } + break; + /* euro 8364 euro sign, U+20AC NEW */ + case 2061257587: + if (len == 4 /* && strncmp(ent, "euro") == 0 */) { + return 8364; + } + break; + } + /* unknown */ + return -1; +} |