1/*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2015 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Kirill Maximov <kir@rus.net> |
16 +----------------------------------------------------------------------+
17 */
18
19/* $Id$ */
20
21#include <stdlib.h>
22
23#ifdef HAVE_UNISTD_H
24#include <unistd.h>
25#endif
26#include <string.h>
27#include <errno.h>
28
29#include "php.h"
30#include "cyr_convert.h"
31
32#include <stdio.h>
33
34/*****************************************************************************
35* This is codetables for different Cyrillic charsets (relative to koi8-r).
36* Each table contains data for 128-255 symbols from ASCII table.
37* First 256 symbols are for conversion from koi8-r to corresponding charset,
38* second 256 symbols are for reverse conversion, from charset to koi8-r.
39*
40* Here we have the following tables:
41* _cyr_win1251 - for windows-1251 charset
42* _cyr_iso88595 - for iso8859-5 charset
43* _cyr_cp866 - for x-cp866 charset
44* _cyr_mac - for x-mac-cyrillic charset
45*
46*****************************************************************************/
47
48typedef unsigned char _cyr_charset_table[512];
49
50/* {{{ static const _cyr_charset_table _cyr_win1251
51 */
52static const _cyr_charset_table _cyr_win1251 = {
530,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
5416,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
5532,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
5648,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
5764,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
5880,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
5996,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
60112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
6146,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
6246,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
63154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
6446,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
65225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
66242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
67193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
68210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
690,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
7016,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
7132,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
7248,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
7364,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
7480,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
7596,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
76112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
7732,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
7832,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
7932,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
8032,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
81254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
82239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
83222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
84207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
85},
86_cyr_cp866 = {
870,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
8816,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
8932,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
9048,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
9164,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
9280,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
9396,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
94112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
95225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
96242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
97193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
9835,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
9943,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
10045,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
101210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
102179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
1030,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
10416,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
10532,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
10648,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
10764,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
10880,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
10996,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
110112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
11132,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
11232,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
113205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
114199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
115238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
116175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
117158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
118143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
119},
120_cyr_iso88595 = {
1210,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
12216,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
12332,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
12448,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
12564,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
12680,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
12796,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
128112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
12932,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
13032,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
13132,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
132225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
133242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
134193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
135210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
13632,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
1370,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
13816,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
13932,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
14048,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
14164,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
14280,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
14396,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
144112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
14532,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
14632,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
14732,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
14832,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
149238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
150223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
151206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
152191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
153},
154_cyr_mac = {
1550,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
15616,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
15732,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
15848,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
15964,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
16080,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
16196,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
162112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
163225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
164242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
165160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
166176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
167128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
168144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
169193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
170210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
1710,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
17216,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
17332,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
17448,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
17564,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
17680,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
17796,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
178112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
179192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
180208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
181160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
182176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
183254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
184239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
185158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
186143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
187};
188/* }}} */
189
190/* {{{ static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
191* This is the function that performs real in-place conversion of the string
192* between charsets.
193* Parameters:
194* str - string to be converted
195* from,to - one-symbol label of source and destination charset
196* The following symbols are used as labels:
197* k - koi8-r
198* w - windows-1251
199* i - iso8859-5
200* a - x-cp866
201* d - x-cp866
202* m - x-mac-cyrillic
203*****************************************************************************/
204static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
205{
206 const unsigned char *from_table, *to_table;
207 unsigned char tmp;
208 int i;
209
210 from_table = NULL;
211 to_table = NULL;
212
213 switch (toupper((int)(unsigned char)from))
214 {
215 case 'W':
216 from_table = _cyr_win1251;
217 break;
218 case 'A':
219 case 'D':
220 from_table = _cyr_cp866;
221 break;
222 case 'I':
223 from_table = _cyr_iso88595;
224 break;
225 case 'M':
226 from_table = _cyr_mac;
227 break;
228 case 'K':
229 break;
230 default:
231 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
232 break;
233 }
234
235 switch (toupper((int)(unsigned char)to))
236 {
237 case 'W':
238 to_table = _cyr_win1251;
239 break;
240 case 'A':
241 case 'D':
242 to_table = _cyr_cp866;
243 break;
244 case 'I':
245 to_table = _cyr_iso88595;
246 break;
247 case 'M':
248 to_table = _cyr_mac;
249 break;
250 case 'K':
251 break;
252 default:
253 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
254 break;
255 }
256
257
258 if (!str)
259 return (char *)str;
260
261 for( i = 0; i<length; i++)
262 {
263 tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
264 str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
265 }
266 return (char *)str;
267}
268/* }}} */
269
270/* {{{ proto string convert_cyr_string(string str, string from, string to)
271 Convert from one Cyrillic character set to another */
272PHP_FUNCTION(convert_cyr_string)
273{
274 char *input, *fr_cs, *to_cs;
275 int input_len, fr_cs_len, to_cs_len;
276 unsigned char *str;
277
278 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss", &input, &input_len, &fr_cs, &fr_cs_len, &to_cs, &to_cs_len) == FAILURE) {
279 return;
280 }
281
282 str = (unsigned char*) estrndup(input, input_len);
283
284 php_convert_cyr_string(str, input_len, fr_cs[0], to_cs[0] TSRMLS_CC);
285 RETVAL_STRING((char *)str, 0);
286}
287/* }}} */
288
289/*
290 * Local variables:
291 * tab-width: 4
292 * c-basic-offset: 4
293 * End:
294 * vim600: sw=4 ts=4 fdm=marker
295 * vim<600: sw=4 ts=4
296 */
297