1 | /* |
2 | +----------------------------------------------------------------------+ |
3 | | PHP Version 5 | |
4 | +----------------------------------------------------------------------+ |
5 | | Copyright (c) 1997-2015 The PHP Group | |
6 | +----------------------------------------------------------------------+ |
7 | | This source file is subject to version 3.01 of the PHP license, | |
8 | | that is bundled with this package in the file LICENSE, and is | |
9 | | available through the world-wide-web at the following url: | |
10 | | http://www.php.net/license/3_01.txt | |
11 | | If you did not receive a copy of the PHP license and are unable to | |
12 | | obtain it through the world-wide-web, please send a note to | |
13 | | license@php.net so we can mail you a copy immediately. | |
14 | +----------------------------------------------------------------------+ |
15 | | Author: Clayton Collie <clcollie@mindspring.com> | |
16 | +----------------------------------------------------------------------+ |
17 | */ |
18 | |
19 | /* $Id$ */ |
20 | |
21 | /* |
22 | scanf.c -- |
23 | |
24 | This file contains the base code which implements sscanf and by extension |
25 | fscanf. Original code is from TCL8.3.0 and bears the following copyright: |
26 | |
27 | This software is copyrighted by the Regents of the University of |
28 | California, Sun Microsystems, Inc., Scriptics Corporation, |
29 | and other parties. The following terms apply to all files associated |
30 | with the software unless explicitly disclaimed in individual files. |
31 | |
32 | The authors hereby grant permission to use, copy, modify, distribute, |
33 | and license this software and its documentation for any purpose, provided |
34 | that existing copyright notices are retained in all copies and that this |
35 | notice is included verbatim in any distributions. No written agreement, |
36 | license, or royalty fee is required for any of the authorized uses. |
37 | Modifications to this software may be copyrighted by their authors |
38 | and need not follow the licensing terms described here, provided that |
39 | the new terms are clearly indicated on the first page of each file where |
40 | they apply. |
41 | |
42 | IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY |
43 | FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
44 | ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY |
45 | DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE |
46 | POSSIBILITY OF SUCH DAMAGE. |
47 | |
48 | THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, |
49 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, |
50 | FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE |
51 | IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE |
52 | NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR |
53 | MODIFICATIONS. |
54 | |
55 | GOVERNMENT USE: If you are acquiring this software on behalf of the |
56 | U.S. government, the Government shall have only "Restricted Rights" |
57 | in the software and related documentation as defined in the Federal |
58 | Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you |
59 | are acquiring the software on behalf of the Department of Defense, the |
60 | software shall be classified as "Commercial Computer Software" and the |
61 | Government shall have only "Restricted Rights" as defined in Clause |
62 | 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the |
63 | authors grant the U.S. Government and others acting in its behalf |
64 | permission to use and distribute the software in accordance with the |
65 | terms specified in this license. |
66 | */ |
67 | |
68 | #include <stdio.h> |
69 | #include <limits.h> |
70 | #include <ctype.h> |
71 | #include "php.h" |
72 | #include "php_variables.h" |
73 | #ifdef HAVE_LOCALE_H |
74 | #include <locale.h> |
75 | #endif |
76 | #include "zend_execute.h" |
77 | #include "zend_operators.h" |
78 | #include "zend_strtod.h" |
79 | #include "php_globals.h" |
80 | #include "basic_functions.h" |
81 | #include "scanf.h" |
82 | |
83 | /* |
84 | * Flag values used internally by [f|s]canf. |
85 | */ |
86 | #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ |
87 | #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ |
88 | #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ |
89 | #define SCAN_WIDTH 0x8 /* A width value was supplied. */ |
90 | |
91 | #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ |
92 | #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ |
93 | #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ |
94 | #define SCAN_XOK 0x80 /* An 'x' is allowed. */ |
95 | #define SCAN_PTOK 0x100 /* Decimal point is allowed. */ |
96 | #define SCAN_EXPOK 0x200 /* An exponent is allowed. */ |
97 | |
98 | #define UCHAR(x) (zend_uchar)(x) |
99 | |
100 | /* |
101 | * The following structure contains the information associated with |
102 | * a character set. |
103 | */ |
104 | typedef struct CharSet { |
105 | int exclude; /* 1 if this is an exclusion set. */ |
106 | int nchars; |
107 | char *chars; |
108 | int nranges; |
109 | struct Range { |
110 | char start; |
111 | char end; |
112 | } *ranges; |
113 | } CharSet; |
114 | |
115 | /* |
116 | * Declarations for functions used only in this file. |
117 | */ |
118 | static char *BuildCharSet(CharSet *cset, char *format); |
119 | static int CharInSet(CharSet *cset, int ch); |
120 | static void ReleaseCharSet(CharSet *cset); |
121 | static inline void scan_set_error_return(int numVars, zval **return_value); |
122 | |
123 | |
124 | /* {{{ BuildCharSet |
125 | *---------------------------------------------------------------------- |
126 | * |
127 | * BuildCharSet -- |
128 | * |
129 | * This function examines a character set format specification |
130 | * and builds a CharSet containing the individual characters and |
131 | * character ranges specified. |
132 | * |
133 | * Results: |
134 | * Returns the next format position. |
135 | * |
136 | * Side effects: |
137 | * Initializes the charset. |
138 | * |
139 | *---------------------------------------------------------------------- |
140 | */ |
141 | static char * BuildCharSet(CharSet *cset, char *format) |
142 | { |
143 | char *ch, start; |
144 | int nranges; |
145 | char *end; |
146 | |
147 | memset(cset, 0, sizeof(CharSet)); |
148 | |
149 | ch = format; |
150 | if (*ch == '^') { |
151 | cset->exclude = 1; |
152 | ch = ++format; |
153 | } |
154 | end = format + 1; /* verify this - cc */ |
155 | |
156 | /* |
157 | * Find the close bracket so we can overallocate the set. |
158 | */ |
159 | if (*ch == ']') { |
160 | ch = end++; |
161 | } |
162 | nranges = 0; |
163 | while (*ch != ']') { |
164 | if (*ch == '-') { |
165 | nranges++; |
166 | } |
167 | ch = end++; |
168 | } |
169 | |
170 | cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0); |
171 | if (nranges > 0) { |
172 | cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0); |
173 | } else { |
174 | cset->ranges = NULL; |
175 | } |
176 | |
177 | /* |
178 | * Now build the character set. |
179 | */ |
180 | cset->nchars = cset->nranges = 0; |
181 | ch = format++; |
182 | start = *ch; |
183 | if (*ch == ']' || *ch == '-') { |
184 | cset->chars[cset->nchars++] = *ch; |
185 | ch = format++; |
186 | } |
187 | while (*ch != ']') { |
188 | if (*format == '-') { |
189 | /* |
190 | * This may be the first character of a range, so don't add |
191 | * it yet. |
192 | */ |
193 | start = *ch; |
194 | } else if (*ch == '-') { |
195 | /* |
196 | * Check to see if this is the last character in the set, in which |
197 | * case it is not a range and we should add the previous character |
198 | * as well as the dash. |
199 | */ |
200 | if (*format == ']') { |
201 | cset->chars[cset->nchars++] = start; |
202 | cset->chars[cset->nchars++] = *ch; |
203 | } else { |
204 | ch = format++; |
205 | |
206 | /* |
207 | * Check to see if the range is in reverse order. |
208 | */ |
209 | if (start < *ch) { |
210 | cset->ranges[cset->nranges].start = start; |
211 | cset->ranges[cset->nranges].end = *ch; |
212 | } else { |
213 | cset->ranges[cset->nranges].start = *ch; |
214 | cset->ranges[cset->nranges].end = start; |
215 | } |
216 | cset->nranges++; |
217 | } |
218 | } else { |
219 | cset->chars[cset->nchars++] = *ch; |
220 | } |
221 | ch = format++; |
222 | } |
223 | return format; |
224 | } |
225 | /* }}} */ |
226 | |
227 | /* {{{ CharInSet |
228 | *---------------------------------------------------------------------- |
229 | * |
230 | * CharInSet -- |
231 | * |
232 | * Check to see if a character matches the given set. |
233 | * |
234 | * Results: |
235 | * Returns non-zero if the character matches the given set. |
236 | * |
237 | * Side effects: |
238 | * None. |
239 | * |
240 | *---------------------------------------------------------------------- |
241 | */ |
242 | static int CharInSet(CharSet *cset, int c) |
243 | { |
244 | char ch = (char) c; |
245 | int i, match = 0; |
246 | |
247 | for (i = 0; i < cset->nchars; i++) { |
248 | if (cset->chars[i] == ch) { |
249 | match = 1; |
250 | break; |
251 | } |
252 | } |
253 | if (!match) { |
254 | for (i = 0; i < cset->nranges; i++) { |
255 | if ((cset->ranges[i].start <= ch) |
256 | && (ch <= cset->ranges[i].end)) { |
257 | match = 1; |
258 | break; |
259 | } |
260 | } |
261 | } |
262 | return (cset->exclude ? !match : match); |
263 | } |
264 | /* }}} */ |
265 | |
266 | /* {{{ ReleaseCharSet |
267 | *---------------------------------------------------------------------- |
268 | * |
269 | * ReleaseCharSet -- |
270 | * |
271 | * Free the storage associated with a character set. |
272 | * |
273 | * Results: |
274 | * None. |
275 | * |
276 | * Side effects: |
277 | * None. |
278 | * |
279 | *---------------------------------------------------------------------- |
280 | */ |
281 | static void ReleaseCharSet(CharSet *cset) |
282 | { |
283 | efree((char *)cset->chars); |
284 | if (cset->ranges) { |
285 | efree((char *)cset->ranges); |
286 | } |
287 | } |
288 | /* }}} */ |
289 | |
290 | /* {{{ ValidateFormat |
291 | *---------------------------------------------------------------------- |
292 | * |
293 | * ValidateFormat -- |
294 | * |
295 | * Parse the format string and verify that it is properly formed |
296 | * and that there are exactly enough variables on the command line. |
297 | * |
298 | * Results: |
299 | * FAILURE or SUCCESS. |
300 | * |
301 | * Side effects: |
302 | * May set php_error based on abnormal conditions. |
303 | * |
304 | * Parameters : |
305 | * format The format string. |
306 | * numVars The number of variables passed to the scan command. |
307 | * totalSubs The number of variables that will be required. |
308 | * |
309 | *---------------------------------------------------------------------- |
310 | */ |
311 | PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs) |
312 | { |
313 | #define STATIC_LIST_SIZE 16 |
314 | int gotXpg, gotSequential, value, i, flags; |
315 | char *end, *ch = NULL; |
316 | int staticAssign[STATIC_LIST_SIZE]; |
317 | int *nassign = staticAssign; |
318 | int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; |
319 | TSRMLS_FETCH(); |
320 | |
321 | /* |
322 | * Initialize an array that records the number of times a variable |
323 | * is assigned to by the format string. We use this to detect if |
324 | * a variable is multiply assigned or left unassigned. |
325 | */ |
326 | if (numVars > nspace) { |
327 | nassign = (int*)safe_emalloc(sizeof(int), numVars, 0); |
328 | nspace = numVars; |
329 | } |
330 | for (i = 0; i < nspace; i++) { |
331 | nassign[i] = 0; |
332 | } |
333 | |
334 | xpgSize = objIndex = gotXpg = gotSequential = 0; |
335 | |
336 | while (*format != '\0') { |
337 | ch = format++; |
338 | flags = 0; |
339 | |
340 | if (*ch != '%') { |
341 | continue; |
342 | } |
343 | ch = format++; |
344 | if (*ch == '%') { |
345 | continue; |
346 | } |
347 | if (*ch == '*') { |
348 | flags |= SCAN_SUPPRESS; |
349 | ch = format++; |
350 | goto xpgCheckDone; |
351 | } |
352 | |
353 | if ( isdigit( (int)*ch ) ) { |
354 | /* |
355 | * Check for an XPG3-style %n$ specification. Note: there |
356 | * must not be a mixture of XPG3 specs and non-XPG3 specs |
357 | * in the same format string. |
358 | */ |
359 | value = strtoul(format-1, &end, 10); |
360 | if (*end != '$') { |
361 | goto notXpg; |
362 | } |
363 | format = end+1; |
364 | ch = format++; |
365 | gotXpg = 1; |
366 | if (gotSequential) { |
367 | goto mixedXPG; |
368 | } |
369 | objIndex = value - 1; |
370 | if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { |
371 | goto badIndex; |
372 | } else if (numVars == 0) { |
373 | /* |
374 | * In the case where no vars are specified, the user can |
375 | * specify %9999$ legally, so we have to consider special |
376 | * rules for growing the assign array. 'value' is |
377 | * guaranteed to be > 0. |
378 | */ |
379 | |
380 | /* set a lower artificial limit on this |
381 | * in the interest of security and resource friendliness |
382 | * 255 arguments should be more than enough. - cc |
383 | */ |
384 | if (value > SCAN_MAX_ARGS) { |
385 | goto badIndex; |
386 | } |
387 | |
388 | xpgSize = (xpgSize > value) ? xpgSize : value; |
389 | } |
390 | goto xpgCheckDone; |
391 | } |
392 | |
393 | notXpg: |
394 | gotSequential = 1; |
395 | if (gotXpg) { |
396 | mixedXPG: |
397 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s" , "cannot mix \"%\" and \"%n$\" conversion specifiers" ); |
398 | goto error; |
399 | } |
400 | |
401 | xpgCheckDone: |
402 | /* |
403 | * Parse any width specifier. |
404 | */ |
405 | if (isdigit(UCHAR(*ch))) { |
406 | value = strtoul(format-1, &format, 10); |
407 | flags |= SCAN_WIDTH; |
408 | ch = format++; |
409 | } |
410 | |
411 | /* |
412 | * Ignore size specifier. |
413 | */ |
414 | if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { |
415 | ch = format++; |
416 | } |
417 | |
418 | if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { |
419 | goto badIndex; |
420 | } |
421 | |
422 | /* |
423 | * Handle the various field types. |
424 | */ |
425 | switch (*ch) { |
426 | case 'n': |
427 | case 'd': |
428 | case 'D': |
429 | case 'i': |
430 | case 'o': |
431 | case 'x': |
432 | case 'X': |
433 | case 'u': |
434 | case 'f': |
435 | case 'e': |
436 | case 'E': |
437 | case 'g': |
438 | case 's': |
439 | break; |
440 | |
441 | case 'c': |
442 | /* we differ here with the TCL implementation in allowing for */ |
443 | /* a character width specification, to be more consistent with */ |
444 | /* ANSI. since Zend auto allocates space for vars, this is no */ |
445 | /* problem - cc */ |
446 | /* |
447 | if (flags & SCAN_WIDTH) { |
448 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion"); |
449 | goto error; |
450 | } |
451 | */ |
452 | break; |
453 | |
454 | case '[': |
455 | if (*format == '\0') { |
456 | goto badSet; |
457 | } |
458 | ch = format++; |
459 | if (*ch == '^') { |
460 | if (*format == '\0') { |
461 | goto badSet; |
462 | } |
463 | ch = format++; |
464 | } |
465 | if (*ch == ']') { |
466 | if (*format == '\0') { |
467 | goto badSet; |
468 | } |
469 | ch = format++; |
470 | } |
471 | while (*ch != ']') { |
472 | if (*format == '\0') { |
473 | goto badSet; |
474 | } |
475 | ch = format++; |
476 | } |
477 | break; |
478 | badSet: |
479 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string" ); |
480 | goto error; |
481 | |
482 | default: { |
483 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"" , *ch); |
484 | goto error; |
485 | } |
486 | } |
487 | |
488 | if (!(flags & SCAN_SUPPRESS)) { |
489 | if (objIndex >= nspace) { |
490 | /* |
491 | * Expand the nassign buffer. If we are using XPG specifiers, |
492 | * make sure that we grow to a large enough size. xpgSize is |
493 | * guaranteed to be at least one larger than objIndex. |
494 | */ |
495 | value = nspace; |
496 | if (xpgSize) { |
497 | nspace = xpgSize; |
498 | } else { |
499 | nspace += STATIC_LIST_SIZE; |
500 | } |
501 | if (nassign == staticAssign) { |
502 | nassign = (void *)safe_emalloc(nspace, sizeof(int), 0); |
503 | for (i = 0; i < STATIC_LIST_SIZE; ++i) { |
504 | nassign[i] = staticAssign[i]; |
505 | } |
506 | } else { |
507 | nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int)); |
508 | } |
509 | for (i = value; i < nspace; i++) { |
510 | nassign[i] = 0; |
511 | } |
512 | } |
513 | nassign[objIndex]++; |
514 | objIndex++; |
515 | } |
516 | } /* while (*format != '\0') */ |
517 | |
518 | /* |
519 | * Verify that all of the variable were assigned exactly once. |
520 | */ |
521 | if (numVars == 0) { |
522 | if (xpgSize) { |
523 | numVars = xpgSize; |
524 | } else { |
525 | numVars = objIndex; |
526 | } |
527 | } |
528 | if (totalSubs) { |
529 | *totalSubs = numVars; |
530 | } |
531 | for (i = 0; i < numVars; i++) { |
532 | if (nassign[i] > 1) { |
533 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s" , "Variable is assigned by multiple \"%n$\" conversion specifiers" ); |
534 | goto error; |
535 | } else if (!xpgSize && (nassign[i] == 0)) { |
536 | /* |
537 | * If the space is empty, and xpgSize is 0 (means XPG wasn't |
538 | * used, and/or numVars != 0), then too many vars were given |
539 | */ |
540 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers" ); |
541 | goto error; |
542 | } |
543 | } |
544 | |
545 | if (nassign != staticAssign) { |
546 | efree((char *)nassign); |
547 | } |
548 | return SCAN_SUCCESS; |
549 | |
550 | badIndex: |
551 | if (gotXpg) { |
552 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s" , "\"%n$\" argument index out of range" ); |
553 | } else { |
554 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers" ); |
555 | } |
556 | |
557 | error: |
558 | if (nassign != staticAssign) { |
559 | efree((char *)nassign); |
560 | } |
561 | return SCAN_ERROR_INVALID_FORMAT; |
562 | #undef STATIC_LIST_SIZE |
563 | } |
564 | /* }}} */ |
565 | |
566 | /* {{{ php_sscanf_internal |
567 | * This is the internal function which does processing on behalf of |
568 | * both sscanf() and fscanf() |
569 | * |
570 | * parameters : |
571 | * string literal string to be processed |
572 | * format format string |
573 | * argCount total number of elements in the args array |
574 | * args arguments passed in from user function (f|s)scanf |
575 | * varStart offset (in args) of 1st variable passed in to (f|s)scanf |
576 | * return_value set with the results of the scan |
577 | */ |
578 | |
579 | PHPAPI int php_sscanf_internal( char *string, char *format, |
580 | int argCount, zval ***args, |
581 | int varStart, zval **return_value TSRMLS_DC) |
582 | { |
583 | int numVars, nconversions, totalVars = -1; |
584 | int i, result; |
585 | long value; |
586 | int objIndex; |
587 | char *end, *baseString; |
588 | zval **current; |
589 | char op = 0; |
590 | int base = 0; |
591 | int underflow = 0; |
592 | size_t width; |
593 | long (*fn)() = NULL; |
594 | char *ch, sch; |
595 | int flags; |
596 | char buf[64]; /* Temporary buffer to hold scanned number |
597 | * strings before they are passed to strtoul() */ |
598 | |
599 | /* do some sanity checking */ |
600 | if ((varStart > argCount) || (varStart < 0)){ |
601 | varStart = SCAN_MAX_ARGS + 1; |
602 | } |
603 | numVars = argCount - varStart; |
604 | if (numVars < 0) { |
605 | numVars = 0; |
606 | } |
607 | |
608 | #if 0 |
609 | zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>" , |
610 | string, format, numVars, varStart); |
611 | #endif |
612 | /* |
613 | * Check for errors in the format string. |
614 | */ |
615 | if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) { |
616 | scan_set_error_return( numVars, return_value ); |
617 | return SCAN_ERROR_INVALID_FORMAT; |
618 | } |
619 | |
620 | objIndex = numVars ? varStart : 0; |
621 | |
622 | /* |
623 | * If any variables are passed, make sure they are all passed by reference |
624 | */ |
625 | if (numVars) { |
626 | for (i = varStart;i < argCount;i++){ |
627 | if ( ! PZVAL_IS_REF( *args[ i ] ) ) { |
628 | php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference" , i); |
629 | scan_set_error_return(numVars, return_value); |
630 | return SCAN_ERROR_VAR_PASSED_BYVAL; |
631 | } |
632 | } |
633 | } |
634 | |
635 | /* |
636 | * Allocate space for the result objects. Only happens when no variables |
637 | * are specified |
638 | */ |
639 | if (!numVars) { |
640 | zval *tmp; |
641 | |
642 | /* allocate an array for return */ |
643 | array_init(*return_value); |
644 | |
645 | for (i = 0; i < totalVars; i++) { |
646 | MAKE_STD_ZVAL(tmp); |
647 | ZVAL_NULL(tmp); |
648 | if (add_next_index_zval(*return_value, tmp) == FAILURE) { |
649 | scan_set_error_return(0, return_value); |
650 | return FAILURE; |
651 | } |
652 | } |
653 | varStart = 0; /* Array index starts from 0 */ |
654 | } |
655 | |
656 | baseString = string; |
657 | |
658 | /* |
659 | * Iterate over the format string filling in the result objects until |
660 | * we reach the end of input, the end of the format string, or there |
661 | * is a mismatch. |
662 | */ |
663 | nconversions = 0; |
664 | /* note ! - we need to limit the loop for objIndex to keep it in bounds */ |
665 | |
666 | while (*format != '\0') { |
667 | ch = format++; |
668 | flags = 0; |
669 | |
670 | /* |
671 | * If we see whitespace in the format, skip whitespace in the string. |
672 | */ |
673 | if ( isspace( (int)*ch ) ) { |
674 | sch = *string; |
675 | while ( isspace( (int)sch ) ) { |
676 | if (*string == '\0') { |
677 | goto done; |
678 | } |
679 | string++; |
680 | sch = *string; |
681 | } |
682 | continue; |
683 | } |
684 | |
685 | if (*ch != '%') { |
686 | literal: |
687 | if (*string == '\0') { |
688 | underflow = 1; |
689 | goto done; |
690 | } |
691 | sch = *string; |
692 | string++; |
693 | if (*ch != sch) { |
694 | goto done; |
695 | } |
696 | continue; |
697 | } |
698 | |
699 | ch = format++; |
700 | if (*ch == '%') { |
701 | goto literal; |
702 | } |
703 | |
704 | /* |
705 | * Check for assignment suppression ('*') or an XPG3-style |
706 | * assignment ('%n$'). |
707 | */ |
708 | if (*ch == '*') { |
709 | flags |= SCAN_SUPPRESS; |
710 | ch = format++; |
711 | } else if ( isdigit(UCHAR(*ch))) { |
712 | value = strtoul(format-1, &end, 10); |
713 | if (*end == '$') { |
714 | format = end+1; |
715 | ch = format++; |
716 | objIndex = varStart + value - 1; |
717 | } |
718 | } |
719 | |
720 | /* |
721 | * Parse any width specifier. |
722 | */ |
723 | if ( isdigit(UCHAR(*ch))) { |
724 | width = strtoul(format-1, &format, 10); |
725 | ch = format++; |
726 | } else { |
727 | width = 0; |
728 | } |
729 | |
730 | /* |
731 | * Ignore size specifier. |
732 | */ |
733 | if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { |
734 | ch = format++; |
735 | } |
736 | |
737 | /* |
738 | * Handle the various field types. |
739 | */ |
740 | switch (*ch) { |
741 | case 'n': |
742 | if (!(flags & SCAN_SUPPRESS)) { |
743 | if (numVars && objIndex >= argCount) { |
744 | break; |
745 | } else if (numVars) { |
746 | zend_uint refcount; |
747 | |
748 | current = args[objIndex++]; |
749 | refcount = Z_REFCOUNT_PP(current); |
750 | zval_dtor( *current ); |
751 | ZVAL_LONG( *current, (long)(string - baseString) ); |
752 | Z_SET_REFCOUNT_PP(current, refcount); |
753 | Z_SET_ISREF_PP(current); |
754 | } else { |
755 | add_index_long(*return_value, objIndex++, string - baseString); |
756 | } |
757 | } |
758 | nconversions++; |
759 | continue; |
760 | |
761 | case 'd': |
762 | case 'D': |
763 | op = 'i'; |
764 | base = 10; |
765 | fn = (long (*)())strtol; |
766 | break; |
767 | case 'i': |
768 | op = 'i'; |
769 | base = 0; |
770 | fn = (long (*)())strtol; |
771 | break; |
772 | case 'o': |
773 | op = 'i'; |
774 | base = 8; |
775 | fn = (long (*)())strtol; |
776 | break; |
777 | case 'x': |
778 | case 'X': |
779 | op = 'i'; |
780 | base = 16; |
781 | fn = (long (*)())strtol; |
782 | break; |
783 | case 'u': |
784 | op = 'i'; |
785 | base = 10; |
786 | flags |= SCAN_UNSIGNED; |
787 | fn = (long (*)())strtoul; |
788 | break; |
789 | |
790 | case 'f': |
791 | case 'e': |
792 | case 'E': |
793 | case 'g': |
794 | op = 'f'; |
795 | break; |
796 | |
797 | case 's': |
798 | op = 's'; |
799 | break; |
800 | |
801 | case 'c': |
802 | op = 's'; |
803 | flags |= SCAN_NOSKIP; |
804 | /*-cc-*/ |
805 | if (0 == width) { |
806 | width = 1; |
807 | } |
808 | /*-cc-*/ |
809 | break; |
810 | case '[': |
811 | op = '['; |
812 | flags |= SCAN_NOSKIP; |
813 | break; |
814 | } /* switch */ |
815 | |
816 | /* |
817 | * At this point, we will need additional characters from the |
818 | * string to proceed. |
819 | */ |
820 | if (*string == '\0') { |
821 | underflow = 1; |
822 | goto done; |
823 | } |
824 | |
825 | /* |
826 | * Skip any leading whitespace at the beginning of a field unless |
827 | * the format suppresses this behavior. |
828 | */ |
829 | if (!(flags & SCAN_NOSKIP)) { |
830 | while (*string != '\0') { |
831 | sch = *string; |
832 | if (! isspace((int)sch) ) { |
833 | break; |
834 | } |
835 | string++; |
836 | } |
837 | if (*string == '\0') { |
838 | underflow = 1; |
839 | goto done; |
840 | } |
841 | } |
842 | |
843 | /* |
844 | * Perform the requested scanning operation. |
845 | */ |
846 | switch (op) { |
847 | case 'c': |
848 | case 's': |
849 | /* |
850 | * Scan a string up to width characters or whitespace. |
851 | */ |
852 | if (width == 0) { |
853 | width = (size_t) ~0; |
854 | } |
855 | end = string; |
856 | while (*end != '\0') { |
857 | sch = *end; |
858 | if ( isspace( (int)sch ) ) { |
859 | break; |
860 | } |
861 | end++; |
862 | if (--width == 0) { |
863 | break; |
864 | } |
865 | } |
866 | if (!(flags & SCAN_SUPPRESS)) { |
867 | if (numVars && objIndex >= argCount) { |
868 | break; |
869 | } else if (numVars) { |
870 | zend_uint refcount; |
871 | |
872 | current = args[objIndex++]; |
873 | refcount = Z_REFCOUNT_PP(current); |
874 | zval_dtor( *current ); |
875 | ZVAL_STRINGL( *current, string, end-string, 1); |
876 | Z_SET_REFCOUNT_PP(current, refcount); |
877 | Z_SET_ISREF_PP(current); |
878 | } else { |
879 | add_index_stringl( *return_value, objIndex++, string, end-string, 1); |
880 | } |
881 | } |
882 | string = end; |
883 | break; |
884 | |
885 | case '[': { |
886 | CharSet cset; |
887 | |
888 | if (width == 0) { |
889 | width = (size_t) ~0; |
890 | } |
891 | end = string; |
892 | |
893 | format = BuildCharSet(&cset, format); |
894 | while (*end != '\0') { |
895 | sch = *end; |
896 | if (!CharInSet(&cset, (int)sch)) { |
897 | break; |
898 | } |
899 | end++; |
900 | if (--width == 0) { |
901 | break; |
902 | } |
903 | } |
904 | ReleaseCharSet(&cset); |
905 | |
906 | if (string == end) { |
907 | /* |
908 | * Nothing matched the range, stop processing |
909 | */ |
910 | goto done; |
911 | } |
912 | if (!(flags & SCAN_SUPPRESS)) { |
913 | if (numVars && objIndex >= argCount) { |
914 | break; |
915 | } else if (numVars) { |
916 | current = args[objIndex++]; |
917 | zval_dtor( *current ); |
918 | ZVAL_STRINGL( *current, string, end-string, 1); |
919 | } else { |
920 | add_index_stringl(*return_value, objIndex++, string, end-string, 1); |
921 | } |
922 | } |
923 | string = end; |
924 | break; |
925 | } |
926 | /* |
927 | case 'c': |
928 | / Scan a single character./ |
929 | |
930 | sch = *string; |
931 | string++; |
932 | if (!(flags & SCAN_SUPPRESS)) { |
933 | if (numVars) { |
934 | char __buf[2]; |
935 | __buf[0] = sch; |
936 | __buf[1] = '\0';; |
937 | current = args[objIndex++]; |
938 | zval_dtor(*current); |
939 | ZVAL_STRINGL( *current, __buf, 1, 1); |
940 | } else { |
941 | add_index_stringl(*return_value, objIndex++, &sch, 1, 1); |
942 | } |
943 | } |
944 | break; |
945 | */ |
946 | case 'i': |
947 | /* |
948 | * Scan an unsigned or signed integer. |
949 | */ |
950 | /*-cc-*/ |
951 | buf[0] = '\0'; |
952 | /*-cc-*/ |
953 | if ((width == 0) || (width > sizeof(buf) - 1)) { |
954 | width = sizeof(buf) - 1; |
955 | } |
956 | |
957 | flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; |
958 | for (end = buf; width > 0; width--) { |
959 | switch (*string) { |
960 | /* |
961 | * The 0 digit has special meaning at the beginning of |
962 | * a number. If we are unsure of the base, it |
963 | * indicates that we are in base 8 or base 16 (if it is |
964 | * followed by an 'x'). |
965 | */ |
966 | case '0': |
967 | /*-cc-*/ |
968 | if (base == 16) { |
969 | flags |= SCAN_XOK; |
970 | } |
971 | /*-cc-*/ |
972 | if (base == 0) { |
973 | base = 8; |
974 | flags |= SCAN_XOK; |
975 | } |
976 | if (flags & SCAN_NOZERO) { |
977 | flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO); |
978 | } else { |
979 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
980 | } |
981 | goto addToInt; |
982 | |
983 | case '1': case '2': case '3': case '4': |
984 | case '5': case '6': case '7': |
985 | if (base == 0) { |
986 | base = 10; |
987 | } |
988 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
989 | goto addToInt; |
990 | |
991 | case '8': case '9': |
992 | if (base == 0) { |
993 | base = 10; |
994 | } |
995 | if (base <= 8) { |
996 | break; |
997 | } |
998 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
999 | goto addToInt; |
1000 | |
1001 | case 'A': case 'B': case 'C': |
1002 | case 'D': case 'E': case 'F': |
1003 | case 'a': case 'b': case 'c': |
1004 | case 'd': case 'e': case 'f': |
1005 | if (base <= 10) { |
1006 | break; |
1007 | } |
1008 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
1009 | goto addToInt; |
1010 | |
1011 | case '+': case '-': |
1012 | if (flags & SCAN_SIGNOK) { |
1013 | flags &= ~SCAN_SIGNOK; |
1014 | goto addToInt; |
1015 | } |
1016 | break; |
1017 | |
1018 | case 'x': case 'X': |
1019 | if ((flags & SCAN_XOK) && (end == buf+1)) { |
1020 | base = 16; |
1021 | flags &= ~SCAN_XOK; |
1022 | goto addToInt; |
1023 | } |
1024 | break; |
1025 | } |
1026 | |
1027 | /* |
1028 | * We got an illegal character so we are done accumulating. |
1029 | */ |
1030 | break; |
1031 | |
1032 | addToInt: |
1033 | /* |
1034 | * Add the character to the temporary buffer. |
1035 | */ |
1036 | *end++ = *string++; |
1037 | if (*string == '\0') { |
1038 | break; |
1039 | } |
1040 | } |
1041 | |
1042 | /* |
1043 | * Check to see if we need to back up because we only got a |
1044 | * sign or a trailing x after a 0. |
1045 | */ |
1046 | if (flags & SCAN_NODIGITS) { |
1047 | if (*string == '\0') { |
1048 | underflow = 1; |
1049 | } |
1050 | goto done; |
1051 | } else if (end[-1] == 'x' || end[-1] == 'X') { |
1052 | end--; |
1053 | string--; |
1054 | } |
1055 | |
1056 | /* |
1057 | * Scan the value from the temporary buffer. If we are |
1058 | * returning a large unsigned value, we have to convert it back |
1059 | * to a string since PHP only supports signed values. |
1060 | */ |
1061 | if (!(flags & SCAN_SUPPRESS)) { |
1062 | *end = '\0'; |
1063 | value = (long) (*fn)(buf, NULL, base); |
1064 | if ((flags & SCAN_UNSIGNED) && (value < 0)) { |
1065 | snprintf(buf, sizeof(buf), "%lu" , value); /* INTL: ISO digit */ |
1066 | if (numVars && objIndex >= argCount) { |
1067 | break; |
1068 | } else if (numVars) { |
1069 | /* change passed value type to string */ |
1070 | current = args[objIndex++]; |
1071 | zval_dtor(*current); |
1072 | ZVAL_STRING( *current, buf, 1 ); |
1073 | } else { |
1074 | add_index_string(*return_value, objIndex++, buf, 1); |
1075 | } |
1076 | } else { |
1077 | if (numVars && objIndex >= argCount) { |
1078 | break; |
1079 | } else if (numVars) { |
1080 | current = args[objIndex++]; |
1081 | zval_dtor(*current); |
1082 | ZVAL_LONG(*current, value); |
1083 | } else { |
1084 | add_index_long(*return_value, objIndex++, value); |
1085 | } |
1086 | } |
1087 | } |
1088 | break; |
1089 | |
1090 | case 'f': |
1091 | /* |
1092 | * Scan a floating point number |
1093 | */ |
1094 | buf[0] = '\0'; /* call me pedantic */ |
1095 | if ((width == 0) || (width > sizeof(buf) - 1)) { |
1096 | width = sizeof(buf) - 1; |
1097 | } |
1098 | flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; |
1099 | for (end = buf; width > 0; width--) { |
1100 | switch (*string) { |
1101 | case '0': case '1': case '2': case '3': |
1102 | case '4': case '5': case '6': case '7': |
1103 | case '8': case '9': |
1104 | flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); |
1105 | goto addToFloat; |
1106 | case '+': |
1107 | case '-': |
1108 | if (flags & SCAN_SIGNOK) { |
1109 | flags &= ~SCAN_SIGNOK; |
1110 | goto addToFloat; |
1111 | } |
1112 | break; |
1113 | case '.': |
1114 | if (flags & SCAN_PTOK) { |
1115 | flags &= ~(SCAN_SIGNOK | SCAN_PTOK); |
1116 | goto addToFloat; |
1117 | } |
1118 | break; |
1119 | case 'e': |
1120 | case 'E': |
1121 | /* |
1122 | * An exponent is not allowed until there has |
1123 | * been at least one digit. |
1124 | */ |
1125 | if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) { |
1126 | flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) |
1127 | | SCAN_SIGNOK | SCAN_NODIGITS; |
1128 | goto addToFloat; |
1129 | } |
1130 | break; |
1131 | } |
1132 | |
1133 | /* |
1134 | * We got an illegal character so we are done accumulating. |
1135 | */ |
1136 | break; |
1137 | |
1138 | addToFloat: |
1139 | /* |
1140 | * Add the character to the temporary buffer. |
1141 | */ |
1142 | *end++ = *string++; |
1143 | if (*string == '\0') { |
1144 | break; |
1145 | } |
1146 | } |
1147 | |
1148 | /* |
1149 | * Check to see if we need to back up because we saw a |
1150 | * trailing 'e' or sign. |
1151 | */ |
1152 | if (flags & SCAN_NODIGITS) { |
1153 | if (flags & SCAN_EXPOK) { |
1154 | /* |
1155 | * There were no digits at all so scanning has |
1156 | * failed and we are done. |
1157 | */ |
1158 | if (*string == '\0') { |
1159 | underflow = 1; |
1160 | } |
1161 | goto done; |
1162 | } |
1163 | |
1164 | /* |
1165 | * We got a bad exponent ('e' and maybe a sign). |
1166 | */ |
1167 | end--; |
1168 | string--; |
1169 | if (*end != 'e' && *end != 'E') { |
1170 | end--; |
1171 | string--; |
1172 | } |
1173 | } |
1174 | |
1175 | /* |
1176 | * Scan the value from the temporary buffer. |
1177 | */ |
1178 | if (!(flags & SCAN_SUPPRESS)) { |
1179 | double dvalue; |
1180 | *end = '\0'; |
1181 | dvalue = zend_strtod(buf, NULL); |
1182 | if (numVars && objIndex >= argCount) { |
1183 | break; |
1184 | } else if (numVars) { |
1185 | current = args[objIndex++]; |
1186 | zval_dtor(*current); |
1187 | ZVAL_DOUBLE(*current, dvalue); |
1188 | } else { |
1189 | add_index_double( *return_value, objIndex++, dvalue ); |
1190 | } |
1191 | } |
1192 | break; |
1193 | } /* switch (op) */ |
1194 | nconversions++; |
1195 | } /* while (*format != '\0') */ |
1196 | |
1197 | done: |
1198 | result = SCAN_SUCCESS; |
1199 | |
1200 | if (underflow && (0==nconversions)) { |
1201 | scan_set_error_return( numVars, return_value ); |
1202 | result = SCAN_ERROR_EOF; |
1203 | } else if (numVars) { |
1204 | convert_to_long( *return_value ); |
1205 | Z_LVAL_PP(return_value) = nconversions; |
1206 | } else if (nconversions < totalVars) { |
1207 | /* TODO: not all elements converted. we need to prune the list - cc */ |
1208 | } |
1209 | return result; |
1210 | } |
1211 | /* }}} */ |
1212 | |
1213 | /* the compiler choked when i tried to make this a macro */ |
1214 | static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */ |
1215 | { |
1216 | if (numVars) { |
1217 | Z_TYPE_PP(return_value) = IS_LONG; |
1218 | Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */ |
1219 | } else { |
1220 | /* convert_to_null calls destructor */ |
1221 | convert_to_null( *return_value ); |
1222 | } |
1223 | } |
1224 | /* }}} */ |
1225 | |
1226 | /* |
1227 | * Local variables: |
1228 | * tab-width: 4 |
1229 | * c-basic-offset: 4 |
1230 | * End: |
1231 | * vim600: sw=4 ts=4 fdm=marker |
1232 | * vim<600: sw=4 ts=4 |
1233 | */ |
1234 | |