summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXavier Roche <xroche@users.noreply.github.com>2014-05-27 16:35:57 +0000
committerXavier Roche <xroche@users.noreply.github.com>2014-05-27 16:35:57 +0000
commitfdcff762f661ba37e48604039f9e17c510ad18d6 (patch)
tree3e779847225750c26560f1629c8b32c4a5a9dddd
parentc3987aca717d438aff9b9d9378a3bc69748400d1 (diff)
Rewritten UTF8 writer to avoir spurious GCC 4.8.3 warnings.
-rw-r--r--src/htscharset.c77
1 files changed, 35 insertions, 42 deletions
diff --git a/src/htscharset.c b/src/htscharset.c
index 4137d65..50be2d2 100644
--- a/src/htscharset.c
+++ b/src/htscharset.c
@@ -723,43 +723,21 @@ static unsigned int nlz8(unsigned char x) {
26 U+200000 U+3FFFFFF 5 111110xx
31 U+4000000 U+7FFFFFFF 6 1111110x
*/
-#define ADD_SEQ(UC, BITS, EMITTER) do { \
- /* number of data bits in first octet */ \
- const unsigned int bits = BITS % 6; \
- /* shift for first octet */ \
- const unsigned int shift0 = BITS - bits; \
- /* first octet */ \
- const unsigned char lead = \
- /* leading bits */ \
- ( 0xff ^ ( ( 1 << ( bits + 1 ) ) - 1 ) ) \
- /* encoded bits */ \
- | ( ( (UC) >> shift0 ) & ( ( 1 << ( bits + 1 ) ) - 1 ) ) \
- ; \
- /* further bytes are encoding 6 bits */ \
- const unsigned char second = \
- 0x80 | ( ( (UC) >> ( shift0 - 6 ) ) & 0x3f ); \
- EMITTER(lead); \
- EMITTER(second); \
- if (BITS > 6*2) { \
- const unsigned char next = \
- 0x80 | ( ( (UC) >> ( shift0 - 6*2 ) ) & 0x3f ); \
- EMITTER(next); \
- if (BITS > 6*3) { \
- const unsigned char next = \
- 0x80 | ( ( (UC) >> ( shift0 - 6*3 ) ) & 0x3f ); \
- EMITTER(next); \
- if (BITS > 6*4) { \
- const unsigned char next = \
- 0x80 | ( ( (UC) >> ( shift0 - 6*4 ) ) & 0x3f ); \
- EMITTER(next); \
- if (BITS > 6*5) { \
- const unsigned char next = \
- 0x80 | ( ( (UC) >> ( shift0 - 6*5 ) ) & 0x3f ); \
- EMITTER(next); \
- } \
- } \
- } \
- } \
+#define ADD_FIRST_SEQ(UC, LEN, EMITTER) do { \
+ /* first octet */ \
+ const unsigned char lead = \
+ /* leading bits: LEN "1" bits */ \
+ ~ ( ( 1 << (unsigned) ( 8 - LEN ) ) - 1 ) \
+ /* encoded bits */ \
+ | ( (UC) >> (unsigned) ( ( LEN - 1 ) * 6 ) ); \
+ EMITTER(lead); \
+ } while(0)
+
+#define ADD_NEXT_SEQ(UC, SHIFT, EMITTER) do { \
+ /* further bytes are encoding 6 bits */ \
+ const unsigned char next = \
+ 0x80 | ( ( (UC) >> SHIFT ) & 0x3f ); \
+ EMITTER(next); \
} while(0)
/* UC is a constant. EMITTER is a macro function taking an unsigned int. */
@@ -767,15 +745,30 @@ static unsigned int nlz8(unsigned char x) {
if ((UC) < 0x80) { \
EMITTER(((unsigned char) (UC))); \
} else if ((UC) < 0x0800) { \
- ADD_SEQ(UC, 11, EMITTER); \
+ ADD_FIRST_SEQ(UC, 2, EMITTER); \
+ ADD_NEXT_SEQ(UC, 0, EMITTER); \
} else if ((UC) < 0x10000) { \
- ADD_SEQ(UC, 16, EMITTER); \
+ ADD_FIRST_SEQ(UC, 3, EMITTER); \
+ ADD_NEXT_SEQ(UC, 6, EMITTER); \
+ ADD_NEXT_SEQ(UC, 0, EMITTER); \
} else if ((UC) < 0x200000) { \
- ADD_SEQ(UC, 21, EMITTER); \
+ ADD_FIRST_SEQ(UC, 4, EMITTER); \
+ ADD_NEXT_SEQ(UC, 12, EMITTER); \
+ ADD_NEXT_SEQ(UC, 6, EMITTER); \
+ ADD_NEXT_SEQ(UC, 0, EMITTER); \
} else if ((UC) < 0x4000000) { \
- ADD_SEQ(UC, 26, EMITTER); \
+ ADD_FIRST_SEQ(UC, 5, EMITTER); \
+ ADD_NEXT_SEQ(UC, 18, EMITTER); \
+ ADD_NEXT_SEQ(UC, 12, EMITTER); \
+ ADD_NEXT_SEQ(UC, 6, EMITTER); \
+ ADD_NEXT_SEQ(UC, 0, EMITTER); \
} else { \
- ADD_SEQ(UC, 31, EMITTER); \
+ ADD_FIRST_SEQ(UC, 6, EMITTER); \
+ ADD_NEXT_SEQ(UC, 24, EMITTER); \
+ ADD_NEXT_SEQ(UC, 18, EMITTER); \
+ ADD_NEXT_SEQ(UC, 12, EMITTER); \
+ ADD_NEXT_SEQ(UC, 6, EMITTER); \
+ ADD_NEXT_SEQ(UC, 0, EMITTER); \
} \
} while(0)