1 |
--- php-5.3.2/ext/xml/tests/bug49687.phpt.cve3870 |
2 |
+++ php-5.3.2/ext/xml/tests/bug49687.phpt |
3 |
@@ -0,0 +1,24 @@ |
4 |
+--TEST-- |
5 |
+Bug #49687 Several utf8_decode deficiencies and vulnerabilities |
6 |
+--SKIPIF-- |
7 |
+<?php |
8 |
+require_once("skipif.inc"); |
9 |
+if (!extension_loaded('xml')) die ("skip xml extension not available"); |
10 |
+?> |
11 |
+--FILE-- |
12 |
+<?php |
13 |
+ |
14 |
+$tests = array( |
15 |
+ "\x41\xC2\x3E\x42", |
16 |
+ "\xE3\x80\x22", |
17 |
+ "\x41\x98\xBA\x42\xE2\x98\x43\xE2\x98\xBA\xE2\x98", |
18 |
+); |
19 |
+foreach ($tests as $t) { |
20 |
+ echo bin2hex(utf8_decode($t)), "\n"; |
21 |
+} |
22 |
+echo "Done.\n"; |
23 |
+--EXPECT-- |
24 |
+413f3e42 |
25 |
+3f22 |
26 |
+413f3f423f433f3f |
27 |
+Done. |
28 |
--- php-5.3.2/ext/xml/xml.c.cve3870 |
29 |
+++ php-5.3.2/ext/xml/xml.c |
30 |
@@ -659,10 +659,111 @@ PHPAPI char *xml_utf8_encode(const char |
31 |
} |
32 |
/* }}} */ |
33 |
|
34 |
+/* copied from trunk's implementation of get_next_char in ext/standard/html.c */ |
35 |
+#define MB_FAILURE(pos, advance) do { \ |
36 |
+ *cursor = pos + (advance); \ |
37 |
+ *status = FAILURE; \ |
38 |
+ return 0; \ |
39 |
+} while (0) |
40 |
+ |
41 |
+#define CHECK_LEN(pos, chars_need) ((str_len - (pos)) >= (chars_need)) |
42 |
+#define utf8_lead(c) ((c) < 0x80 || ((c) >= 0xC2 && (c) <= 0xF4)) |
43 |
+#define utf8_trail(c) ((c) >= 0x80 && (c) <= 0xBF) |
44 |
+ |
45 |
+/* {{{ php_next_utf8_char |
46 |
+ */ |
47 |
+static inline unsigned int php_next_utf8_char( |
48 |
+ const unsigned char *str, |
49 |
+ size_t str_len, |
50 |
+ size_t *cursor, |
51 |
+ int *status) |
52 |
+{ |
53 |
+ size_t pos = *cursor; |
54 |
+ unsigned int this_char = 0; |
55 |
+ unsigned char c; |
56 |
+ |
57 |
+ *status = SUCCESS; |
58 |
+ |
59 |
+ if (!CHECK_LEN(pos, 1)) |
60 |
+ MB_FAILURE(pos, 1); |
61 |
+ |
62 |
+ /* We'll follow strategy 2. from section 3.6.1 of UTR #36: |
63 |
+ * "In a reported illegal byte sequence, do not include any |
64 |
+ * non-initial byte that encodes a valid character or is a leading |
65 |
+ * byte for a valid sequence.ยป */ |
66 |
+ c = str[pos]; |
67 |
+ if (c < 0x80) { |
68 |
+ this_char = c; |
69 |
+ pos++; |
70 |
+ } else if (c < 0xc2) { |
71 |
+ MB_FAILURE(pos, 1); |
72 |
+ } else if (c < 0xe0) { |
73 |
+ if (!CHECK_LEN(pos, 2)) |
74 |
+ MB_FAILURE(pos, 1); |
75 |
+ |
76 |
+ if (!utf8_trail(str[pos + 1])) { |
77 |
+ MB_FAILURE(pos, utf8_lead(str[pos + 1]) ? 1 : 2); |
78 |
+ } |
79 |
+ this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); |
80 |
+ if (this_char < 0x80) { /* non-shortest form */ |
81 |
+ MB_FAILURE(pos, 2); |
82 |
+ } |
83 |
+ pos += 2; |
84 |
+ } else if (c < 0xf0) { |
85 |
+ size_t avail = str_len - pos; |
86 |
+ |
87 |
+ if (avail < 3 || |
88 |
+ !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2])) { |
89 |
+ if (avail < 2 || utf8_lead(str[pos + 1])) |
90 |
+ MB_FAILURE(pos, 1); |
91 |
+ else if (avail < 3 || utf8_lead(str[pos + 2])) |
92 |
+ MB_FAILURE(pos, 2); |
93 |
+ else |
94 |
+ MB_FAILURE(pos, 3); |
95 |
+ } |
96 |
+ |
97 |
+ this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); |
98 |
+ if (this_char < 0x800) { /* non-shortest form */ |
99 |
+ MB_FAILURE(pos, 3); |
100 |
+ } else if (this_char >= 0xd800 && this_char <= 0xdfff) { /* surrogate */ |
101 |
+ MB_FAILURE(pos, 3); |
102 |
+ } |
103 |
+ pos += 3; |
104 |
+ } else if (c < 0xf5) { |
105 |
+ size_t avail = str_len - pos; |
106 |
+ |
107 |
+ if (avail < 4 || |
108 |
+ !utf8_trail(str[pos + 1]) || !utf8_trail(str[pos + 2]) || |
109 |
+ !utf8_trail(str[pos + 3])) { |
110 |
+ if (avail < 2 || utf8_lead(str[pos + 1])) |
111 |
+ MB_FAILURE(pos, 1); |
112 |
+ else if (avail < 3 || utf8_lead(str[pos + 2])) |
113 |
+ MB_FAILURE(pos, 2); |
114 |
+ else if (avail < 4 || utf8_lead(str[pos + 3])) |
115 |
+ MB_FAILURE(pos, 3); |
116 |
+ else |
117 |
+ MB_FAILURE(pos, 4); |
118 |
+ } |
119 |
+ |
120 |
+ this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); |
121 |
+ if (this_char < 0x10000 || this_char > 0x10FFFF) { /* non-shortest form or outside range */ |
122 |
+ MB_FAILURE(pos, 4); |
123 |
+ } |
124 |
+ pos += 4; |
125 |
+ } else { |
126 |
+ MB_FAILURE(pos, 1); |
127 |
+ } |
128 |
+ |
129 |
+ *cursor = pos; |
130 |
+ return this_char; |
131 |
+} |
132 |
+/* }}} */ |
133 |
+ |
134 |
+ |
135 |
/* {{{ xml_utf8_decode */ |
136 |
PHPAPI char *xml_utf8_decode(const XML_Char *s, int len, int *newlen, const XML_Char *encoding) |
137 |
{ |
138 |
- int pos = len; |
139 |
+ size_t pos = 0; |
140 |
char *newbuf = emalloc(len + 1); |
141 |
unsigned int c; |
142 |
char (*decoder)(unsigned short) = NULL; |
143 |
@@ -681,36 +782,15 @@ PHPAPI char *xml_utf8_decode(const XML_C |
144 |
newbuf[*newlen] = '\0'; |
145 |
return newbuf; |
146 |
} |
147 |
- while (pos > 0) { |
148 |
- c = (unsigned char)(*s); |
149 |
- if (c >= 0xf0) { /* four bytes encoded, 21 bits */ |
150 |
- if(pos-4 >= 0) { |
151 |
- c = ((s[0]&7)<<18) | ((s[1]&63)<<12) | ((s[2]&63)<<6) | (s[3]&63); |
152 |
- } else { |
153 |
- c = '?'; |
154 |
- } |
155 |
- s += 4; |
156 |
- pos -= 4; |
157 |
- } else if (c >= 0xe0) { /* three bytes encoded, 16 bits */ |
158 |
- if(pos-3 >= 0) { |
159 |
- c = ((s[0]&63)<<12) | ((s[1]&63)<<6) | (s[2]&63); |
160 |
- } else { |
161 |
- c = '?'; |
162 |
- } |
163 |
- s += 3; |
164 |
- pos -= 3; |
165 |
- } else if (c >= 0xc0) { /* two bytes encoded, 11 bits */ |
166 |
- if(pos-2 >= 0) { |
167 |
- c = ((s[0]&63)<<6) | (s[1]&63); |
168 |
- } else { |
169 |
- c = '?'; |
170 |
- } |
171 |
- s += 2; |
172 |
- pos -= 2; |
173 |
- } else { |
174 |
- s++; |
175 |
- pos--; |
176 |
+ |
177 |
+ while (pos < (size_t)len) { |
178 |
+ int status = FAILURE; |
179 |
+ c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status); |
180 |
+ |
181 |
+ if (status == FAILURE || c > 0xFFU) { |
182 |
+ c = '?'; |
183 |
} |
184 |
+ |
185 |
newbuf[*newlen] = decoder ? decoder(c) : c; |
186 |
++*newlen; |
187 |
} |