diff --git a/libc/Kconfig b/libc/Kconfig index 757b115d81..c33aa553e5 100644 --- a/libc/Kconfig +++ b/libc/Kconfig @@ -61,6 +61,12 @@ config LIBC_LONG_LONG libraries that will be drawn into the build if long long support is enabled. +config LIBC_SCANSET + default "Scanset support" + default n + ---help--- + Add scanset support to sscanf(). + config LIBC_IOCTL_VARIADIC bool "Enable variadic ioctl()" default n diff --git a/libc/stdio/lib_sscanf.c b/libc/stdio/lib_sscanf.c index 0bc413295b..fcbdb941b8 100644 --- a/libc/stdio/lib_sscanf.c +++ b/libc/stdio/lib_sscanf.c @@ -134,6 +134,153 @@ static int findwidth(FAR const char *buf, FAR const char *fmt) return strcspn(buf, spaces); } +/**************************************************************************** + * Function: findscanset + * + * Description: + * Fill in the given table from the scanset at the given format. + * Return a pointer to the character the closing ']'. + * The table has a 1 wherever characters should be considered part of the + * scanset. + ****************************************************************************/ + +#ifdef CONFIG_LIBC_SCANSET +static FAR const char *findscanset(FAR const char *fmt, + FAR unsigned char set[32]) +{ + int c; + int n; + int v; + + fmt++; /* Skip '[' */ + + /* first `clear' the whole table */ + + c = *fmt++; /* First char hat => negated scanset */ + if (c == '^') + { + v = 1; /* Default => accept */ + c = *fmt++; /* Get new first char */ + } + else + { + v = 0; /* Default => reject */ + } + + memset(set, 0, 32); + if (c == 0) + { + goto doexit; + } + + /* Now set the entries corresponding to the actual scanset + * to the opposite of the above. + * + * The first character may be ']' (or '-') without being special; + * the last character may be '-'. + */ + + for (;;) + { + set[c / 8] |= (1 << (c % 8)); /* take character c */ + +doswitch: + n = *fmt++; /* and examine the next */ + switch (n) + { + case 0: /* format ended too soon */ + case ']': /* end of scanset */ + goto doexit; + + case '-': + /* A scanset of the form + * + * [01+-] + * + * is defined as "the digit 0, the digit 1, the character +, the + * character -", but the effect of a scanset such as + * + * [a-zA-Z0-9] + * + * is implementation defined. The V7 Unix scanf treats "a-z" as + * "the letters a through z", but treats "a-a" as "the letter a, + * the character -, and the letter a". + * + * For compatibility, the `-' is not considerd to define a range + * if the character following it is either a close bracket + * (required by ANSI) or is not numerically greater than the + * character* we just stored in the table (c). + */ + + n = *fmt; + if (n == ']' || n < c) + { + c = '-'; + break; /* resume the for(;;) */ + } + + fmt++; + do + { + /* Fill in the range */ + + c++; + set[c / 8] |= (1 << (c % 8)); /* Take character c */ + } + while (c < n); + + /* Alas, the V7 Unix scanf also treats formats such as [a-c-e] as + * "the letters a through e". This too is permitted by the + * standard. + */ + + goto doswitch; + + default: /* just another character */ + c = n; + break; + } + } + +doexit: + if (v) /* default => accept */ + { + for (int i = 0; i < 32; i++) /* invert all */ + { + set[i] ^= 0xFF; + } + } + + return (fmt - 1); +} +#endif + +/**************************************************************************** + * Function: scansetwidth + ****************************************************************************/ + +#ifdef CONFIG_LIBC_SCANSET +static int scansetwidth(FAR const char *buf, + FAR const unsigned char set[32]) +{ + FAR const char *next = buf; + int c; + + while (*next) + { + c = *next; + if ((set[c / 8] & (1 << (c % 8))) == 0) + { + break; + } + + next++; + } + + return (next - buf); +} +#endif + /**************************************************************************** * Public Functions ****************************************************************************/ @@ -177,6 +324,9 @@ int vsscanf(FAR const char *buf, FAR const char *fmt, va_list ap) int fwidth; int base = 10; char tmp[MAXLN]; +#ifdef CONFIG_LIBC_SCANSET + unsigned char set[32]; /* Bit field (256 / 8) */ +#endif linfo("vsscanf: buf=\"%s\" fmt=\"%s\"\n", buf, fmt); @@ -220,7 +370,11 @@ int vsscanf(FAR const char *buf, FAR const char *fmt, va_list ap) { linfo("vsscanf: Processing %c\n", *fmt); +#ifdef CONFIG_LIBC_SCANSET + if (strchr("dibouxcsefgn[%", *fmt)) +#else if (strchr("dibouxcsefgn%", *fmt)) +#endif { break; } @@ -307,6 +461,72 @@ int vsscanf(FAR const char *buf, FAR const char *fmt, va_list ap) } } +#ifdef CONFIG_LIBC_SCANSET + /* Process %[: Scanset conversion */ + + if (*fmt == '[') + { + linfo("vsscanf: Performing scanset conversion\n"); + + fmt = findscanset(fmt, set); /* find scanset */ + + /* Get a pointer to the char * value. We need to do this even + * if we have reached the end of the input data in order to + * update the 'ap' variable. + */ + + tv = NULL; /* To avoid warnings about begin uninitialized */ + if (!noassign) + { + tv = va_arg(ap, FAR char *); + tv[0] = '\0'; + } + + /* But we only perform the data conversion is we still have + * bytes remaining in the input data stream. + */ + + if (*buf) + { + /* Skip over white space */ + + while (isspace(*buf)) + { + buf++; + } + + /* Guess a field width using some heuristics */ + + fwidth = scansetwidth(buf, set); + + /* Use the actual field's width if 1) no fieldwidth + * specified or 2) the actual field's width is smaller + * than fieldwidth specified + */ + + if (!width || fwidth < width) + { + width = fwidth; + } + + width = MIN(sizeof(tmp) - 1, width); + + /* Copy the string (if we are making an assignment) */ + + if (!noassign) + { + strncpy(tv, buf, width); + tv[width] = '\0'; + count++; + } + + /* Update the buffer pointer past the string in the input */ + + buf += width; + } + } +#endif + /* Process %c: Character conversion */ else if (*fmt == 'c')