1 | /* |
2 | +----------------------------------------------------------------------+ |
3 | | PHP Version 5 | |
4 | +----------------------------------------------------------------------+ |
5 | | Copyright (c) 1997-2015 The PHP Group | |
6 | +----------------------------------------------------------------------+ |
7 | | This source file is subject to version 3.01 of the PHP license, | |
8 | | that is bundled with this package in the file LICENSE, and is | |
9 | | available through the world-wide-web at the following url: | |
10 | | http://www.php.net/license/3_01.txt | |
11 | | If you did not receive a copy of the PHP license and are unable to | |
12 | | obtain it through the world-wide-web, please send a note to | |
13 | | license@php.net so we can mail you a copy immediately. | |
14 | +----------------------------------------------------------------------+ |
15 | | Author: Kirill Maximov <kir@rus.net> | |
16 | +----------------------------------------------------------------------+ |
17 | */ |
18 | |
19 | /* $Id$ */ |
20 | |
21 | #include <stdlib.h> |
22 | |
23 | #ifdef HAVE_UNISTD_H |
24 | #include <unistd.h> |
25 | #endif |
26 | #include <string.h> |
27 | #include <errno.h> |
28 | |
29 | #include "php.h" |
30 | #include "cyr_convert.h" |
31 | |
32 | #include <stdio.h> |
33 | |
34 | /***************************************************************************** |
35 | * This is codetables for different Cyrillic charsets (relative to koi8-r). |
36 | * Each table contains data for 128-255 symbols from ASCII table. |
37 | * First 256 symbols are for conversion from koi8-r to corresponding charset, |
38 | * second 256 symbols are for reverse conversion, from charset to koi8-r. |
39 | * |
40 | * Here we have the following tables: |
41 | * _cyr_win1251 - for windows-1251 charset |
42 | * _cyr_iso88595 - for iso8859-5 charset |
43 | * _cyr_cp866 - for x-cp866 charset |
44 | * _cyr_mac - for x-mac-cyrillic charset |
45 | * |
46 | *****************************************************************************/ |
47 | |
48 | typedef unsigned char _cyr_charset_table[512]; |
49 | |
50 | /* {{{ static const _cyr_charset_table _cyr_win1251 |
51 | */ |
52 | static const _cyr_charset_table _cyr_win1251 = { |
53 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
54 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
55 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
56 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
57 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
58 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
59 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
60 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
61 | 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46, |
62 | 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46, |
63 | 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183, |
64 | 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167, |
65 | 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, |
66 | 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, |
67 | 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, |
68 | 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, |
69 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
70 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
71 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
72 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
73 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
74 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
75 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
76 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
77 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
78 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
79 | 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32, |
80 | 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169, |
81 | 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, |
82 | 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250, |
83 | 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206, |
84 | 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218, |
85 | }, |
86 | _cyr_cp866 = { |
87 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
88 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
89 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
90 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
91 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
92 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
93 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
94 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
95 | 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, |
96 | 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, |
97 | 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, |
98 | 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43, |
99 | 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45, |
100 | 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35, |
101 | 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, |
102 | 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154, |
103 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
104 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
105 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
106 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
107 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
108 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
109 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
110 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
111 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
112 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
113 | 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198, |
114 | 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32, |
115 | 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174, |
116 | 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234, |
117 | 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142, |
118 | 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154, |
119 | }, |
120 | _cyr_iso88595 = { |
121 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
122 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
123 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
124 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
125 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
126 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
127 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
128 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
129 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
130 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
131 | 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
132 | 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, |
133 | 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, |
134 | 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, |
135 | 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209, |
136 | 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
137 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
138 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
139 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
140 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
141 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
142 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
143 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
144 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
145 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
146 | 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32, |
147 | 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32, |
148 | 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32, |
149 | 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222, |
150 | 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234, |
151 | 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190, |
152 | 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202, |
153 | }, |
154 | _cyr_mac = { |
155 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
156 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
157 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
158 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
159 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
160 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
161 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
162 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
163 | 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240, |
164 | 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241, |
165 | 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, |
166 | 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, |
167 | 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, |
168 | 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209, |
169 | 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208, |
170 | 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255, |
171 | 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, |
172 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, |
173 | 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, |
174 | 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, |
175 | 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79, |
176 | 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95, |
177 | 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111, |
178 | 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, |
179 | 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, |
180 | 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, |
181 | 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175, |
182 | 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191, |
183 | 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238, |
184 | 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250, |
185 | 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142, |
186 | 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154, |
187 | }; |
188 | /* }}} */ |
189 | |
190 | /* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC) |
191 | * This is the function that performs real in-place conversion of the string |
192 | * between charsets. |
193 | * Parameters: |
194 | * str - string to be converted |
195 | * from,to - one-symbol label of source and destination charset |
196 | * The following symbols are used as labels: |
197 | * k - koi8-r |
198 | * w - windows-1251 |
199 | * i - iso8859-5 |
200 | * a - x-cp866 |
201 | * d - x-cp866 |
202 | * m - x-mac-cyrillic |
203 | *****************************************************************************/ |
204 | static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC) |
205 | { |
206 | const unsigned char *from_table, *to_table; |
207 | unsigned char tmp; |
208 | int i; |
209 | |
210 | from_table = NULL; |
211 | to_table = NULL; |
212 | |
213 | switch (toupper((int)(unsigned char)from)) |
214 | { |
215 | case 'W': |
216 | from_table = _cyr_win1251; |
217 | break; |
218 | case 'A': |
219 | case 'D': |
220 | from_table = _cyr_cp866; |
221 | break; |
222 | case 'I': |
223 | from_table = _cyr_iso88595; |
224 | break; |
225 | case 'M': |
226 | from_table = _cyr_mac; |
227 | break; |
228 | case 'K': |
229 | break; |
230 | default: |
231 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c" , from); |
232 | break; |
233 | } |
234 | |
235 | switch (toupper((int)(unsigned char)to)) |
236 | { |
237 | case 'W': |
238 | to_table = _cyr_win1251; |
239 | break; |
240 | case 'A': |
241 | case 'D': |
242 | to_table = _cyr_cp866; |
243 | break; |
244 | case 'I': |
245 | to_table = _cyr_iso88595; |
246 | break; |
247 | case 'M': |
248 | to_table = _cyr_mac; |
249 | break; |
250 | case 'K': |
251 | break; |
252 | default: |
253 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c" , to); |
254 | break; |
255 | } |
256 | |
257 | |
258 | if (!str) |
259 | return (char *)str; |
260 | |
261 | for( i = 0; i<length; i++) |
262 | { |
263 | tmp = (from_table == NULL)? str[i] : from_table[ str[i] ]; |
264 | str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256]; |
265 | } |
266 | return (char *)str; |
267 | } |
268 | /* }}} */ |
269 | |
270 | /* {{{ proto string convert_cyr_string(string str, string from, string to) |
271 | Convert from one Cyrillic character set to another */ |
272 | PHP_FUNCTION(convert_cyr_string) |
273 | { |
274 | char *input, *fr_cs, *to_cs; |
275 | int input_len, fr_cs_len, to_cs_len; |
276 | unsigned char *str; |
277 | |
278 | if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss" , &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) { |
279 | return; |
280 | } |
281 | |
282 | str = (unsigned char*) estrndup(input, input_len); |
283 | |
284 | php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC); |
285 | RETVAL_STRING((char *)str, 0); |
286 | } |
287 | /* }}} */ |
288 | |
289 | /* |
290 | * Local variables: |
291 | * tab-width: 4 |
292 | * c-basic-offset: 4 |
293 | * End: |
294 | * vim600: sw=4 ts=4 fdm=marker |
295 | * vim<600: sw=4 ts=4 |
296 | */ |
297 | |