1 | /* |
2 | +----------------------------------------------------------------------+ |
3 | | PHP Version 5 | |
4 | +----------------------------------------------------------------------+ |
5 | | Copyright (c) 1997-2015 The PHP Group | |
6 | +----------------------------------------------------------------------+ |
7 | | This source file is subject to version 3.01 of the PHP license, | |
8 | | that is bundled with this package in the file LICENSE, and is | |
9 | | available through the world-wide-web at the following url: | |
10 | | http://www.php.net/license/3_01.txt | |
11 | | If you did not receive a copy of the PHP license and are unable to | |
12 | | obtain it through the world-wide-web, please send a note to | |
13 | | license@php.net so we can mail you a copy immediately. | |
14 | +----------------------------------------------------------------------+ |
15 | | Author: Hartmut Holzgraefe <hholzgra@php.net> | |
16 | +----------------------------------------------------------------------+ |
17 | */ |
18 | /* $Id$ */ |
19 | |
20 | #include "php.h" |
21 | #include <stdlib.h> |
22 | #include <errno.h> |
23 | #include <ctype.h> |
24 | #include "php_string.h" |
25 | |
26 | #define LEVENSHTEIN_MAX_LENGTH 255 |
27 | |
28 | /* {{{ reference_levdist |
29 | * reference implementation, only optimized for memory usage, not speed */ |
30 | static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del ) |
31 | { |
32 | int *p1, *p2, *tmp; |
33 | int i1, i2, c0, c1, c2; |
34 | |
35 | if (l1 == 0) { |
36 | return l2 * cost_ins; |
37 | } |
38 | if (l2 == 0) { |
39 | return l1 * cost_del; |
40 | } |
41 | |
42 | if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) { |
43 | return -1; |
44 | } |
45 | p1 = safe_emalloc((l2 + 1), sizeof(int), 0); |
46 | p2 = safe_emalloc((l2 + 1), sizeof(int), 0); |
47 | |
48 | for (i2 = 0; i2 <= l2; i2++) { |
49 | p1[i2] = i2 * cost_ins; |
50 | } |
51 | for (i1 = 0; i1 < l1 ; i1++) { |
52 | p2[0] = p1[0] + cost_del; |
53 | |
54 | for (i2 = 0; i2 < l2; i2++) { |
55 | c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep); |
56 | c1 = p1[i2 + 1] + cost_del; |
57 | if (c1 < c0) { |
58 | c0 = c1; |
59 | } |
60 | c2 = p2[i2] + cost_ins; |
61 | if (c2 < c0) { |
62 | c0 = c2; |
63 | } |
64 | p2[i2 + 1] = c0; |
65 | } |
66 | tmp = p1; |
67 | p1 = p2; |
68 | p2 = tmp; |
69 | } |
70 | c0 = p1[l2]; |
71 | |
72 | efree(p1); |
73 | efree(p2); |
74 | |
75 | return c0; |
76 | } |
77 | /* }}} */ |
78 | |
79 | /* {{{ custom_levdist |
80 | */ |
81 | static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC) |
82 | { |
83 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet" ); |
84 | /* not there yet */ |
85 | |
86 | return -1; |
87 | } |
88 | /* }}} */ |
89 | |
90 | /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del]) |
91 | Calculate Levenshtein distance between two strings */ |
92 | PHP_FUNCTION(levenshtein) |
93 | { |
94 | int argc = ZEND_NUM_ARGS(); |
95 | char *str1, *str2; |
96 | char *callback_name; |
97 | int str1_len, str2_len, callback_len; |
98 | long cost_ins, cost_rep, cost_del; |
99 | int distance = -1; |
100 | |
101 | switch (argc) { |
102 | case 2: /* just two strings: use maximum performance version */ |
103 | if (zend_parse_parameters(2 TSRMLS_CC, "ss" , &str1, &str1_len, &str2, &str2_len) == FAILURE) { |
104 | return; |
105 | } |
106 | distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1); |
107 | break; |
108 | |
109 | case 5: /* more general version: calc cost by ins/rep/del weights */ |
110 | if (zend_parse_parameters(5 TSRMLS_CC, "sslll" , &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) { |
111 | return; |
112 | } |
113 | distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del); |
114 | break; |
115 | |
116 | case 3: /* most general version: calc cost by user-supplied function */ |
117 | if (zend_parse_parameters(3 TSRMLS_CC, "sss" , &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) { |
118 | return; |
119 | } |
120 | distance = custom_levdist(str1, str2, callback_name TSRMLS_CC); |
121 | break; |
122 | |
123 | default: |
124 | WRONG_PARAM_COUNT; |
125 | } |
126 | |
127 | if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) { |
128 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long" ); |
129 | } |
130 | |
131 | RETURN_LONG(distance); |
132 | } |
133 | /* }}} */ |
134 | |
135 | /* |
136 | * Local variables: |
137 | * tab-width: 4 |
138 | * c-basic-offset: 4 |
139 | * End: |
140 | * vim600: sw=4 ts=4 fdm=marker |
141 | * vim<600: sw=4 ts=4 |
142 | */ |
143 | |