@@ -39,89 +39,78 @@ typedef struct
3939
4040static mr_charcode mr_decode_c_string (uint8_t * * strp )
4141{
42- mr_charcode charcode = * * strp ;
43-
42+ uint8_t * str = * strp ;
4443 * strp += 1 ;
4544
46- return charcode ;
45+ return * str ;
4746}
4847
4948static mr_charcode mr_decode_utf8 (uint8_t * * strp )
5049{
5150 uint8_t * str = * strp ;
52- mr_charcode value ;
51+ uint8_t lead = str [0 ];
52+ uint32_t length ;
53+ mr_charcode codepoint ;
5354
54- if (str [ 0 ] < 0x80 )
55+ if (!( lead >> 7 ) )
5556 {
56- value = str [0 ];
57-
58- * strp += 1 ;
57+ length = 1 ;
58+ codepoint = lead & 0x7f ;
5959 }
60- else if ((str [ 0 ] & 0xe0 ) == 0xc0 )
60+ else if ((lead >> 5 ) == 0x06 )
6161 {
62- value = ((str [0 ] & 0x1f ) << 6 ) |
63- ((str [1 ] & 0x3f ) << 0 );
64-
65- * strp += 2 ;
62+ length = 2 ;
63+ codepoint = lead & 0x1f ;
6664 }
67- else if ((str [ 0 ] & 0xf0 ) == 0xe0 )
65+ else if ((lead >> 4 ) == 0x0e )
6866 {
69- value = ((str [0 ] & 0x0f ) << 12 ) |
70- ((str [1 ] & 0x3f ) << 6 ) |
71- ((str [2 ] & 0x3f ) << 0 );
72-
73- * strp += 3 ;
67+ length = 3 ;
68+ codepoint = lead & 0x0f ;
7469 }
75- else if ((str [ 0 ] & 0xf8 ) == 0xf0 && ( str [ 0 ] <= 0xf4 ) )
70+ else if ((lead >> 4 ) == 0x0e )
7671 {
77- value = ((str [0 ] & 0x07 ) << 18 ) |
78- ((str [1 ] & 0x3f ) << 12 ) |
79- ((str [2 ] & 0x3f ) << 6 ) |
80- ((str [3 ] & 0x3f ) << 0 );
81-
82- * strp += 4 ;
72+ length = 4 ;
73+ codepoint = lead & 0x07 ;
8374 }
8475 else
85- {
86- // Invalid
87-
88- value = -1 ;
76+ return 0 ;
8977
90- * strp += 1 ;
78+ for (uint32_t i = 1 ; i < length ; i ++ )
79+ {
80+ if ((str [i ] >> 6 ) != 0x02 )
81+ return 0 ;
82+ codepoint = (codepoint << 6 ) | (str [i ] & 0x3f );
9183 }
9284
93- return value ;
85+ * strp += length ;
86+
87+ return codepoint ;
9488}
9589
9690static mr_charcode mr_decode_utf16 (uint8_t * * strp )
9791{
9892 uint16_t * str = (uint16_t * )* strp ;
93+ uint16_t highSurrogate = str [0 ];
9994 mr_charcode value ;
10095
101- if ((str [ 0 ] < 0xd800 ) || ( str [ 0 ] >= 0xe000 ) )
96+ if ((highSurrogate >> 11 ) != 0x1b )
10297 {
103- value = str [0 ];
104-
10598 * strp += 2 ;
106- }
107- else if ((str [0 ] >= 0xd800 ) && (str [0 ] < 0xdc00 ) &&
108- (str [1 ] >= 0xdc00 ) && (str [1 ] <= 0xde00 ))
109- {
110- value = 0x00010000 +
111- (((str [0 ] & 0x03ff ) << 10 ) |
112- ((str [1 ] & 0x03ff ) << 0 ));
11399
114- * strp += 4 ;
100+ return highSurrogate ;
115101 }
116102 else
117103 {
118- // Invalid
119- value = -1 ;
104+ uint16_t lowSurrogate = str [1 ];
105+ if (((highSurrogate >> 10 ) != 0x36 ) &&
106+ ((lowSurrogate >> 10 ) != 0x37 ))
107+ return 0 ;
120108
121109 * strp += 2 ;
122- }
123110
124- return value ;
111+ return (highSurrogate & 0x3ff ) << 10 |
112+ (lowSurrogate & 0x3ff );
113+ }
125114}
126115
127116// Data decoding
0 commit comments