1 | /* |
2 | +----------------------------------------------------------------------+ |
3 | | PHP Version 5 | |
4 | +----------------------------------------------------------------------+ |
5 | | Copyright (c) 1997-2015 The PHP Group | |
6 | +----------------------------------------------------------------------+ |
7 | | This source file is subject to version 3.01 of the PHP license, | |
8 | | that is bundled with this package in the file LICENSE, and is | |
9 | | available through the world-wide-web at the following url: | |
10 | | http://www.php.net/license/3_01.txt | |
11 | | If you did not receive a copy of the PHP license and are unable to | |
12 | | obtain it through the world-wide-web, please send a note to | |
13 | | license@php.net so we can mail you a copy immediately. | |
14 | +----------------------------------------------------------------------+ |
15 | | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> | |
16 | +----------------------------------------------------------------------+ |
17 | */ |
18 | /* $Id$ */ |
19 | |
20 | #include "php.h" |
21 | #include <stdlib.h> |
22 | #include <errno.h> |
23 | #include <ctype.h> |
24 | #include "php_string.h" |
25 | |
26 | /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */ |
27 | /* {{{ proto string soundex(string str) |
28 | Calculate the soundex key of a string */ |
29 | PHP_FUNCTION(soundex) |
30 | { |
31 | char *str; |
32 | int i, _small, str_len, code, last; |
33 | char soundex[4 + 1]; |
34 | |
35 | static char soundex_table[26] = |
36 | {0, /* A */ |
37 | '1', /* B */ |
38 | '2', /* C */ |
39 | '3', /* D */ |
40 | 0, /* E */ |
41 | '1', /* F */ |
42 | '2', /* G */ |
43 | 0, /* H */ |
44 | 0, /* I */ |
45 | '2', /* J */ |
46 | '2', /* K */ |
47 | '4', /* L */ |
48 | '5', /* M */ |
49 | '5', /* N */ |
50 | 0, /* O */ |
51 | '1', /* P */ |
52 | '2', /* Q */ |
53 | '6', /* R */ |
54 | '2', /* S */ |
55 | '3', /* T */ |
56 | 0, /* U */ |
57 | '1', /* V */ |
58 | 0, /* W */ |
59 | '2', /* X */ |
60 | 0, /* Y */ |
61 | '2'}; /* Z */ |
62 | |
63 | if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s" , &str, &str_len) == FAILURE) { |
64 | return; |
65 | } |
66 | if (str_len == 0) { |
67 | RETURN_FALSE; |
68 | } |
69 | |
70 | /* build soundex string */ |
71 | last = -1; |
72 | for (i = 0, _small = 0; i < str_len && _small < 4; i++) { |
73 | /* convert chars to upper case and strip non-letter chars */ |
74 | /* BUG: should also map here accented letters used in non */ |
75 | /* English words or names (also found in English text!): */ |
76 | /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */ |
77 | code = toupper((int)(unsigned char)str[i]); |
78 | if (code >= 'A' && code <= 'Z') { |
79 | if (_small == 0) { |
80 | /* remember first valid char */ |
81 | soundex[_small++] = code; |
82 | last = soundex_table[code - 'A']; |
83 | } |
84 | else { |
85 | /* ignore sequences of consonants with same soundex */ |
86 | /* code in trail, and vowels unless they separate */ |
87 | /* consonant letters */ |
88 | code = soundex_table[code - 'A']; |
89 | if (code != last) { |
90 | if (code != 0) { |
91 | soundex[_small++] = code; |
92 | } |
93 | last = code; |
94 | } |
95 | } |
96 | } |
97 | } |
98 | /* pad with '0' and terminate with 0 ;-) */ |
99 | while (_small < 4) { |
100 | soundex[_small++] = '0'; |
101 | } |
102 | soundex[_small] = '\0'; |
103 | |
104 | RETURN_STRINGL(soundex, _small, 1); |
105 | } |
106 | /* }}} */ |
107 | |
108 | /* |
109 | * Local variables: |
110 | * tab-width: 4 |
111 | * c-basic-offset: 4 |
112 | * End: |
113 | * vim600: sw=4 ts=4 fdm=marker |
114 | * vim<600: sw=4 ts=4 |
115 | */ |
116 | |