Tor 0.4.9.0-alpha-dev
scanf.c
Go to the documentation of this file.
1/* Copyright (c) 2003-2004, Roger Dingledine
2 * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
3 * Copyright (c) 2007-2021, The Tor Project, Inc. */
4/* See LICENSE for licensing information */
5
6/**
7 * \file scanf.c
8 * \brief Locale-independent minimal implementation of sscanf().
9 **/
10
11#include "lib/string/scanf.h"
13#include "lib/cc/torint.h"
14#include "lib/err/torerr.h"
15
16#include <stdlib.h>
17
18#define MAX_SCANF_WIDTH 9999
19
20/** Helper: given an ASCII-encoded decimal digit, return its numeric value.
21 * NOTE: requires that its input be in-bounds. */
22static int
24{
25 int num = ((int)d) - (int)'0';
26 raw_assert(num <= 9 && num >= 0);
27 return num;
28}
29
30/** Helper: Read an unsigned int from *<b>bufp</b> of up to <b>width</b>
31 * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
32 * success, store the result in <b>out</b>, advance bufp to the next
33 * character, and return 0. On failure, return -1. */
34static int
35scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
36{
37 unsigned long result = 0;
38 int scanned_so_far = 0;
39 const int hex = base==16;
40 raw_assert(base == 10 || base == 16);
41 if (!bufp || !*bufp || !out)
42 return -1;
43 if (width<0)
44 width=MAX_SCANF_WIDTH;
45
46 while (**bufp && (hex?TOR_ISXDIGIT(**bufp):TOR_ISDIGIT(**bufp))
47 && scanned_so_far < width) {
48 unsigned digit = hex?hex_decode_digit(*(*bufp)++):digit_to_num(*(*bufp)++);
49 // Check for overflow beforehand, without actually causing any overflow
50 // This preserves functionality on compilers that don't wrap overflow
51 // (i.e. that trap or optimise away overflow)
52 // result * base + digit > ULONG_MAX
53 // result * base > ULONG_MAX - digit
54 if (result > (ULONG_MAX - digit)/base)
55 return -1; /* Processing this digit would overflow */
56 result = result * base + digit;
57 ++scanned_so_far;
58 }
59
60 if (!scanned_so_far) /* No actual digits scanned */
61 return -1;
62
63 *out = result;
64 return 0;
65}
66
67/** Helper: Read an signed int from *<b>bufp</b> of up to <b>width</b>
68 * characters. (Handle arbitrary width if <b>width</b> is less than 0.) On
69 * success, store the result in <b>out</b>, advance bufp to the next
70 * character, and return 0. On failure, return -1. */
71static int
72scan_signed(const char **bufp, long *out, int width)
73{
74 int neg = 0;
75 unsigned long result = 0;
76
77 if (!bufp || !*bufp || !out)
78 return -1;
79 if (width<0)
80 width=MAX_SCANF_WIDTH;
81
82 if (**bufp == '-') {
83 neg = 1;
84 ++*bufp;
85 --width;
86 }
87
88 if (scan_unsigned(bufp, &result, width, 10) < 0)
89 return -1;
90
91 if (neg && result > 0) {
92 if (result > ((unsigned long)LONG_MAX) + 1)
93 return -1; /* Underflow */
94 else if (result == ((unsigned long)LONG_MAX) + 1)
95 *out = LONG_MIN;
96 else {
97 /* We once had a far more clever no-overflow conversion here, but
98 * some versions of GCC apparently ran it into the ground. Now
99 * we just check for LONG_MIN explicitly.
100 */
101 *out = -(long)result;
102 }
103 } else {
104 if (result > LONG_MAX)
105 return -1; /* Overflow */
106 *out = (long)result;
107 }
108
109 return 0;
110}
111
112/** Helper: Read a decimal-formatted double from *<b>bufp</b> of up to
113 * <b>width</b> characters. (Handle arbitrary width if <b>width</b> is less
114 * than 0.) On success, store the result in <b>out</b>, advance bufp to the
115 * next character, and return 0. On failure, return -1. */
116static int
117scan_double(const char **bufp, double *out, int width)
118{
119 int neg = 0;
120 double result = 0;
121 int scanned_so_far = 0;
122
123 if (!bufp || !*bufp || !out)
124 return -1;
125 if (width<0)
126 width=MAX_SCANF_WIDTH;
127
128 if (**bufp == '-') {
129 neg = 1;
130 ++*bufp;
131 }
132
133 while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
134 const int digit = digit_to_num(*(*bufp)++);
135 result = result * 10 + digit;
136 ++scanned_so_far;
137 }
138 if (**bufp == '.') {
139 double fracval = 0, denominator = 1;
140 ++*bufp;
141 ++scanned_so_far;
142 while (**bufp && TOR_ISDIGIT(**bufp) && scanned_so_far < width) {
143 const int digit = digit_to_num(*(*bufp)++);
144 fracval = fracval * 10 + digit;
145 denominator *= 10;
146 ++scanned_so_far;
147 }
148 result += fracval / denominator;
149 }
150
151 if (!scanned_so_far) /* No actual digits scanned */
152 return -1;
153
154 *out = neg ? -result : result;
155 return 0;
156}
157
158/** Helper: copy up to <b>width</b> non-space characters from <b>bufp</b> to
159 * <b>out</b>. Make sure <b>out</b> is nul-terminated. Advance <b>bufp</b>
160 * to the next non-space character or the EOS. */
161static int
162scan_string(const char **bufp, char *out, int width)
163{
164 int scanned_so_far = 0;
165 if (!bufp || !out || width < 0)
166 return -1;
167 while (**bufp && ! TOR_ISSPACE(**bufp) && scanned_so_far < width) {
168 *out++ = *(*bufp)++;
169 ++scanned_so_far;
170 }
171 *out = '\0';
172 return 0;
173}
174
175/** Locale-independent, minimal, no-surprises scanf variant, accepting only a
176 * restricted pattern format. For more info on what it supports, see
177 * tor_sscanf() documentation. */
178int
179tor_vsscanf(const char *buf, const char *pattern, va_list ap)
180{
181 int n_matched = 0;
182
183 while (*pattern) {
184 if (*pattern != '%') {
185 if (*buf == *pattern) {
186 ++buf;
187 ++pattern;
188 continue;
189 } else {
190 return n_matched;
191 }
192 } else {
193 int width = -1;
194 int longmod = 0;
195 ++pattern;
196 if (TOR_ISDIGIT(*pattern)) {
197 width = digit_to_num(*pattern++);
198 while (TOR_ISDIGIT(*pattern)) {
199 width *= 10;
200 width += digit_to_num(*pattern++);
201 if (width > MAX_SCANF_WIDTH)
202 return -1;
203 }
204 if (!width) /* No zero-width things. */
205 return -1;
206 }
207 if (*pattern == 'l') {
208 longmod = 1;
209 ++pattern;
210 }
211 if (*pattern == 'u' || *pattern == 'x') {
212 unsigned long u;
213 const int base = (*pattern == 'u') ? 10 : 16;
214 if (!*buf)
215 return n_matched;
216 if (scan_unsigned(&buf, &u, width, base)<0)
217 return n_matched;
218 if (longmod) {
219 unsigned long *out = va_arg(ap, unsigned long *);
220 *out = u;
221 } else {
222 unsigned *out = va_arg(ap, unsigned *);
223 if (u > UINT_MAX)
224 return n_matched;
225 *out = (unsigned) u;
226 }
227 ++pattern;
228 ++n_matched;
229 } else if (*pattern == 'f') {
230 double *d = va_arg(ap, double *);
231 if (!longmod)
232 return -1; /* float not supported */
233 if (!*buf)
234 return n_matched;
235 if (scan_double(&buf, d, width)<0)
236 return n_matched;
237 ++pattern;
238 ++n_matched;
239 } else if (*pattern == 'd') {
240 long lng=0;
241 if (scan_signed(&buf, &lng, width)<0)
242 return n_matched;
243 if (longmod) {
244 long *out = va_arg(ap, long *);
245 *out = lng;
246 } else {
247 int *out = va_arg(ap, int *);
248#if LONG_MAX > INT_MAX
249 if (lng < INT_MIN || lng > INT_MAX)
250 return n_matched;
251#endif
252 *out = (int)lng;
253 }
254 ++pattern;
255 ++n_matched;
256 } else if (*pattern == 's') {
257 char *s = va_arg(ap, char *);
258 if (longmod)
259 return -1;
260 if (width < 0)
261 return -1;
262 if (scan_string(&buf, s, width)<0)
263 return n_matched;
264 ++pattern;
265 ++n_matched;
266 } else if (*pattern == 'c') {
267 char *ch = va_arg(ap, char *);
268 if (longmod)
269 return -1;
270 if (width != -1)
271 return -1;
272 if (!*buf)
273 return n_matched;
274 *ch = *buf++;
275 ++pattern;
276 ++n_matched;
277 } else if (*pattern == '%') {
278 if (*buf != '%')
279 return n_matched;
280 if (longmod)
281 return -1;
282 ++buf;
283 ++pattern;
284 } else {
285 return -1; /* Unrecognized pattern component. */
286 }
287 }
288 }
289
290 return n_matched;
291}
292
293/** Minimal sscanf replacement: parse <b>buf</b> according to <b>pattern</b>
294 * and store the results in the corresponding argument fields. Differs from
295 * sscanf in that:
296 * <ul><li>It only handles %u, %lu, %x, %lx, %[NUM]s, %d, %ld, %lf, and %c.
297 * <li>It only handles decimal inputs for %lf. (12.3, not 1.23e1)
298 * <li>It does not handle arbitrarily long widths.
299 * <li>Numbers do not consume any space characters.
300 * <li>It is locale-independent.
301 * <li>%u and %x do not consume any space.
302 * <li>It returns -1 on malformed patterns.</ul>
303 *
304 * (As with other locale-independent functions, we need this to parse data that
305 * is in ASCII without worrying that the C library's locale-handling will make
306 * miscellaneous characters look like numbers, spaces, and so on.)
307 */
308int
309tor_sscanf(const char *buf, const char *pattern, ...)
310{
311 int r;
312 va_list ap;
313 va_start(ap, pattern);
314 r = tor_vsscanf(buf, pattern, ap);
315 va_end(ap);
316 return r;
317}
Locale-independent character-type inspection (header)
static int hex_decode_digit(char c)
Definition: compat_ctype.h:43
static int scan_double(const char **bufp, double *out, int width)
Definition: scanf.c:117
int tor_sscanf(const char *buf, const char *pattern,...)
Definition: scanf.c:309
static int scan_unsigned(const char **bufp, unsigned long *out, int width, unsigned base)
Definition: scanf.c:35
static int scan_string(const char **bufp, char *out, int width)
Definition: scanf.c:162
int tor_vsscanf(const char *buf, const char *pattern, va_list ap)
Definition: scanf.c:179
static int digit_to_num(char d)
Definition: scanf.c:23
static int scan_signed(const char **bufp, long *out, int width)
Definition: scanf.c:72
Header for scanf.c.
Headers for torerr.c.
Integer definitions used throughout Tor.