spa: json: more parsing errors & add spa_json_get_error

Disallow = and : as bare items in [] containers, as that likely is
"[ { foo = bar } ]" mistyped as "[ foo = bar ]".

Disallow nesting errors, eg. "[ foo bar" or "[ foo bar }".

Fix handling of ", \ and # in bare strings.

Fix ignoring trailing comments.

Add a fixed-size stack (128 levels) to the tokenizer, so that it can
check these at levels below its depth.

When the tokenizer encounters an error, make it and its parents enter
error state where no further input will be processed. This allows caller
to check for parse errors later as convenient.

The error state can be queried using spa_json_get_error, which also
looks up the error line/column position.
This commit is contained in:
Pauli Virtanen 2024-03-20 19:42:02 +02:00
parent 41d7762f8b
commit 31e5823010
3 changed files with 367 additions and 26 deletions

View File

@ -44,7 +44,7 @@ static inline void spa_json_init(struct spa_json * iter, const char *data, size_
{
*iter = SPA_JSON_INIT(data, size);
}
#define SPA_JSON_ENTER(iter) ((struct spa_json) { (iter)->cur, (iter)->end, (iter), 0, 0 })
#define SPA_JSON_ENTER(iter) ((struct spa_json) { (iter)->cur, (iter)->end, (iter), (iter)->state & 0xf0, 0 })
static inline void spa_json_enter(struct spa_json * iter, struct spa_json * sub)
{
@ -54,68 +54,114 @@ static inline void spa_json_enter(struct spa_json * iter, struct spa_json * sub)
#define SPA_JSON_SAVE(iter) ((struct spa_json) { (iter)->cur, (iter)->end, })
/** Get the next token. \a value points to the token and the return value
* is the length. */
* is the length. Returns -1 on parse error, 0 on end of input. */
static inline int spa_json_next(struct spa_json * iter, const char **value)
{
int utf8_remain = 0;
enum { __NONE, __STRUCT, __BARE, __STRING, __UTF8, __ESC, __COMMENT };
enum {
__NONE, __STRUCT, __BARE, __STRING, __UTF8, __ESC, __COMMENT,
__ARRAY_FLAG = 0x10,
__OBJECT_FLAG = 0x20,
__ERROR_FLAG = 0x40,
__FLAGS = 0xf0,
};
uint8_t object_stack[16] = {0};
uint8_t array_stack[SPA_N_ELEMENTS(object_stack)] = {0};
*value = iter->cur;
if (iter->state & __ERROR_FLAG)
return -1;
for (; iter->cur < iter->end; iter->cur++) {
unsigned char cur = (unsigned char)*iter->cur;
uint32_t flag;
again:
switch (iter->state) {
flag = iter->state & __FLAGS;
switch (iter->state & ~__FLAGS) {
case __NONE:
iter->state = __STRUCT;
iter->state = __STRUCT | flag;
iter->depth = 0;
goto again;
case __STRUCT:
switch (cur) {
case '\0': case '\t': case ' ': case '\r': case '\n': case ':': case '=': case ',':
case '\0': case '\t': case ' ': case '\r': case '\n': case ',':
continue;
case ':': case '=':
if (flag & __ARRAY_FLAG)
goto error;
continue;
case '#':
iter->state = __COMMENT;
iter->state = __COMMENT | flag;
continue;
case '"':
*value = iter->cur;
iter->state = __STRING;
iter->state = __STRING | flag;
continue;
case '[': case '{':
iter->state = __STRUCT | (cur == '[' ? __ARRAY_FLAG : __OBJECT_FLAG);
if ((iter->depth >> 3) < SPA_N_ELEMENTS(object_stack)) {
uint8_t mask = 1 << (iter->depth & 0x7);
SPA_FLAG_UPDATE(object_stack[iter->depth >> 3], mask, flag & __OBJECT_FLAG);
SPA_FLAG_UPDATE(array_stack[iter->depth >> 3], mask, flag & __ARRAY_FLAG);
}
*value = iter->cur;
if (++iter->depth > 1)
continue;
iter->cur++;
return 1;
case '}': case ']':
if ((flag & __ARRAY_FLAG) && cur != ']')
goto error;
if ((flag & __OBJECT_FLAG) && cur != '}')
goto error;
iter->state = __STRUCT;
if (iter->depth == 0) {
if (iter->parent)
iter->parent->cur = iter->cur;
else
goto error;
return 0;
}
--iter->depth;
if ((iter->depth >> 3) < SPA_N_ELEMENTS(object_stack)) {
uint8_t mask = 1 << (iter->depth & 0x7);
if (SPA_FLAG_IS_SET(object_stack[iter->depth >> 3], mask))
iter->state |= __OBJECT_FLAG;
if (SPA_FLAG_IS_SET(array_stack[iter->depth >> 3], mask))
iter->state |= __ARRAY_FLAG;
}
continue;
case '\\':
/* disallow bare escape */
goto error;
default:
*value = iter->cur;
iter->state = __BARE;
iter->state = __BARE | flag;
}
continue;
case __BARE:
switch (cur) {
case '\t': case ' ': case '\r': case '\n':
case '"': case '#':
case ':': case ',': case '=': case ']': case '}':
iter->state = __STRUCT;
iter->state = __STRUCT | flag;
if (iter->depth > 0)
goto again;
return iter->cur - *value;
case '\\':
/* disallow bare escape */
goto error;
}
continue;
case __STRING:
switch (cur) {
case '\\':
iter->state = __ESC;
iter->state = __ESC | flag;
continue;
case '"':
iter->state = __STRUCT;
iter->state = __STRUCT | flag;
if (iter->depth > 0)
continue;
return ++iter->cur - *value;
@ -127,44 +173,98 @@ static inline int spa_json_next(struct spa_json * iter, const char **value)
SPA_FALLTHROUGH;
case 192 ... 223:
utf8_remain++;
iter->state = __UTF8;
iter->state = __UTF8 | flag;
continue;
default:
if (cur >= 32 && cur <= 126)
continue;
}
return -1;
goto error;
case __UTF8:
switch (cur) {
case 128 ... 191:
if (--utf8_remain == 0)
iter->state = __STRING;
iter->state = __STRING | flag;
continue;
}
return -1;
goto error;
case __ESC:
switch (cur) {
case '"': case '\\': case '/': case 'b': case 'f':
case 'n': case 'r': case 't': case 'u':
iter->state = __STRING;
iter->state = __STRING | flag;
continue;
}
return -1;
goto error;
case __COMMENT:
switch (cur) {
case '\n': case '\r':
iter->state = __STRUCT;
iter->state = __STRUCT | flag;
}
break;
default:
goto error;
}
}
if (iter->depth != 0)
return -1;
if (iter->state != __STRUCT) {
iter->state = __STRUCT;
if (iter->depth != 0 || iter->parent)
goto error;
switch (iter->state & ~__FLAGS) {
case __STRING: case __UTF8: case __ESC:
/* string/escape not closed */
goto error;
case __COMMENT:
/* trailing comment */
return 0;
}
if ((iter->state & ~__FLAGS) != __STRUCT) {
iter->state = __STRUCT | (iter->state & __FLAGS);
return iter->cur - *value;
}
return 0;
error:
iter->state |= __ERROR_FLAG;
while (iter->parent) {
if (iter->parent->state & __ERROR_FLAG)
break;
iter->parent->state |= __ERROR_FLAG;
iter->parent->cur = iter->cur;
iter = iter->parent;
}
return -1;
}
/**
* Return whether parse error occurred, and its possible location.
*
* \since 1.1.0
*/
static inline bool spa_json_get_error(struct spa_json *iter, const char *start, int *line, int *col)
{
int linepos = 1, colpos = 1;
const char *p;
if (!(iter->state & 0x40))
return false;
for (p = start; p && p != iter->cur; ++p) {
if (*p == '\n') {
linepos++;
colpos = 1;
} else {
colpos++;
}
}
if (line)
*line = linepos;
if (col)
*col = colpos;
return true;
}
static inline int spa_json_enter_container(struct spa_json *iter, struct spa_json *sub, char type)

View File

@ -207,7 +207,7 @@ PWTEST(properties_new_string)
pw_properties_free(props);
props = pw_properties_new_string("foo=bar bar=\"baz");
props = pw_properties_new_string("foo=bar bar=\"baz\"");
pwtest_ptr_notnull(props);
pwtest_int_eq(props->flags, 0U);
pwtest_int_eq(props->dict.n_items, 2U);

View File

@ -81,6 +81,26 @@ static void expect_end(struct spa_json *it)
pwtest_int_eq(memcmp(&it2, it, sizeof(*it)), 0);
}
static void expect_parse_error(struct spa_json *it, const char *str, int line, int col)
{
const char *value;
struct spa_json it2;
int linepos, colpos;
pwtest_int_eq(spa_json_next(it, &value), -1);
pwtest_bool_true(spa_json_get_error(it, str, &linepos, &colpos));
pwtest_int_eq(linepos, line);
pwtest_int_eq(colpos, col);
/* parse error is idempotent also for parents */
while (it) {
memcpy(&it2, it, sizeof(*it));
pwtest_int_eq(spa_json_next(it, &value), -1);
pwtest_int_eq(memcmp(&it2, it, sizeof(*it)), 0);
it = it->parent;
}
}
static void expect_array(struct spa_json *it, struct spa_json *sub)
{
pwtest_int_eq(spa_json_enter_array(it, sub), 1);
@ -99,7 +119,7 @@ static void expect_string(struct spa_json *it, const char *str)
pwtest_int_gt((len = spa_json_next(it, &value)), 0);
check_type(TYPE_STRING, value, len);
s = alloca(len+1);
spa_json_parse_stringn(value, len, s, len+1);
pwtest_int_eq(spa_json_parse_stringn(value, len, s, len+1), 1);
pwtest_str_eq(s, str);
}
@ -158,9 +178,11 @@ static void expect_null(struct spa_json *it)
PWTEST(json_parse)
{
char buf[1024];
int i;
struct spa_json it[5];
const char *json = " { "
"\"foo\": \"bar\","
"\"foo\": \"bar\", # comment\n"
"\"foo\\\" \": true, "
"\"foo \\n\\r\\t\": false,"
" \" arr\": [ true, false, null, 5, 5.7, \"str]\"],"
@ -198,6 +220,8 @@ PWTEST(json_parse)
expect_float(&it[1], -1.8f);
expect_string(&it[1], "foo 6");
expect_float(&it[1], +2.8f);
expect_end(&it[1]);
expect_end(&it[0]);
/* in the array */
expect_type(&it[2], TYPE_TRUE);
expect_type(&it[2], TYPE_FALSE);
@ -218,6 +242,79 @@ PWTEST(json_parse)
expect_string(&it[3], "1.9");
expect_float(&it[3], 1.9f);
expect_end(&it[3]);
expect_end(&it[2]);
pwtest_bool_false(spa_json_get_error(&it[0], NULL, NULL, NULL));
pwtest_bool_false(spa_json_get_error(&it[1], NULL, NULL, NULL));
pwtest_bool_false(spa_json_get_error(&it[2], NULL, NULL, NULL));
pwtest_bool_false(spa_json_get_error(&it[3], NULL, NULL, NULL));
json = "section={\"key\":value}, section2=[item1,item2]";
spa_json_init(&it[0], json, strlen(json));
expect_string_or_bare(&it[0], "section");
expect_object(&it[0], &it[1]);
expect_string_or_bare(&it[0], "section2");
expect_array(&it[0], &it[1]);
expect_end(&it[0]);
spa_json_init(&it[0], json, strlen(json));
expect_string_or_bare(&it[0], "section");
expect_object(&it[0], &it[1]);
expect_string(&it[1], "key");
expect_string_or_bare(&it[1], "value");
expect_string_or_bare(&it[0], "section2");
expect_array(&it[0], &it[1]);
expect_string_or_bare(&it[1], "item1");
expect_string_or_bare(&it[1], "item2");
expect_end(&it[0]);
/* 2-byte utf8 */
json = "\"\xc3\xa4\", \"\xc3\xa4\"";
spa_json_init(&it[0], json, strlen(json));
expect_string(&it[0], "\xc3\xa4");
expect_string(&it[0], "\xc3\xa4");
expect_end(&it[0]);
/* 3-byte utf8 */
json = "\"\xe6\xad\xa3\", \"\xe6\xad\xa3\"";
spa_json_init(&it[0], json, strlen(json));
expect_string(&it[0], "\xe6\xad\xa3");
expect_string(&it[0], "\xe6\xad\xa3");
expect_end(&it[0]);
/* 4-byte utf8 */
json = "\"\xf0\x92\x80\x80\", \"\xf0\x92\x80\x80\"";
spa_json_init(&it[0], json, strlen(json));
expect_string(&it[0], "\xf0\x92\x80\x80");
expect_string(&it[0], "\xf0\x92\x80\x80");
expect_end(&it[0]);
/* run-in comment in bare */
json = "foo#comment";
spa_json_init(&it[0], json, strlen(json));
expect_string_or_bare(&it[0], "foo");
expect_end(&it[0]);
/* end of parsing idempotent */
json = "{}";
spa_json_init(&it[0], json, strlen(json));
expect_object(&it[0], &it[1]);
expect_end(&it[0]);
expect_end(&it[0]);
/* overflowing parser nesting stack is not an error */
for (i = 0; i < 256; ++i)
buf[i] = '[';
for (; i < 512; ++i)
buf[i] = ']';
buf[i++] = '\0';
spa_json_init(&it[0], buf, strlen(buf));
pwtest_int_eq(spa_json_next(&it[0], &value), 1);
expect_end(&it[0]);
/* non-null terminated strings OK */
json = "1.234";
spa_json_init(&it[0], json, 4);
@ -267,6 +364,149 @@ PWTEST(json_parse)
return PWTEST_PASS;
}
PWTEST(json_parse_fail)
{
char buf[1024];
struct spa_json it[5];
const char *json, *value;
int i;
/* = in array */
json = "[ foo = bar ]";
spa_json_init(&it[0], json, strlen(json));
expect_array(&it[0], &it[1]);
expect_string_or_bare(&it[1], "foo");
expect_parse_error(&it[1], json, 1, 7);
expect_parse_error(&it[1], json, 1, 7); /* parse error is idempotent */
expect_parse_error(&it[0], json, 1, 7); /* parse error visible in parent */
/* : in array */
json = "[ foo, bar\n : quux ]";
spa_json_init(&it[0], json, strlen(json));
expect_array(&it[0], &it[1]);
expect_string_or_bare(&it[1], "foo");
expect_string_or_bare(&it[1], "bar");
expect_parse_error(&it[1], json, 2, 2);
/* missing ] */
json = "[ foo, bar";
spa_json_init(&it[0], json, strlen(json));
pwtest_int_eq(spa_json_next(&it[0], &value), 1);
expect_parse_error(&it[0], json, 1, 11);
/* spurious ] */
json = "foo, bar ]";
spa_json_init(&it[0], json, strlen(json));
pwtest_int_eq(spa_json_next(&it[0], &value), 3);
pwtest_int_eq(spa_json_next(&it[0], &value), 3);
expect_parse_error(&it[0], json, 1, 10);
/* spurious } */
json = "{ foo, bar } }";
spa_json_init(&it[0], json, strlen(json));
expect_object(&it[0], &it[1]);
expect_parse_error(&it[0], json, 1, 14);
/* bad nesting */
json = "{ {[{[{[{[{[{[{[{[{[{[{[{[ ]}]}]}]}]}]}]}]}]}]}]}]} ]";
spa_json_init(&it[0], json, strlen(json));
pwtest_int_eq(spa_json_next(&it[0], &value), 1);
expect_parse_error(&it[0], json, 1, strlen(json));
/* bad nesting */
json = "[ {[{[{[{[{[{[{[{[{[{[{[{[ ]}]}]}]}]}]}]}]}]}]}]}]} }";
spa_json_init(&it[0], json, strlen(json));
pwtest_int_eq(spa_json_next(&it[0], &value), 1);
expect_parse_error(&it[0], json, 1, strlen(json));
/* unclosed string */
json = "\"foo";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 5);
/* unclosed string */
json = "foo\"";
spa_json_init(&it[0], json, strlen(json));
expect_string_or_bare(&it[0], "foo");
expect_parse_error(&it[0], json, 1, 5);
/* unclosed string */
json = "foo\"bar";
spa_json_init(&it[0], json, strlen(json));
expect_string_or_bare(&it[0], "foo");
expect_parse_error(&it[0], json, 1, 8);
/* unclosed escape */
json = "\"\\";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 3);
/* bare escape */
json = "foo\\n";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 4);
/* bare escape */
json = "\\nfoo";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 1);
/* bad nesting in subparser */
json = "{[]";
spa_json_init(&it[0], json, strlen(json));
expect_object(&it[0], &it[1]);
expect_array(&it[1], &it[2]);
expect_parse_error(&it[1], json, 1, 4);
/* entered parser assumes nesting */
json = "[]";
spa_json_init(&it[0], json, strlen(json));
spa_json_enter(&it[0], &it[1]);
expect_array(&it[1], &it[2]);
expect_parse_error(&it[1], json, 1, 3);
/* overflowing parser nesting stack */
for (i = 0; i < 256; ++i)
buf[i] = '[';
for (; i < 511; ++i)
buf[i] = ']';
buf[i++] = '}';
buf[i++] = '\0';
spa_json_init(&it[0], buf, strlen(buf));
pwtest_int_eq(spa_json_next(&it[0], &value), 1);
expect_parse_error(&it[0], buf, 1, strlen(buf));
/* bad utf8 */
json = "\"\xc0\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 3);
json = "\"\xe6\xad\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 4);
json = "\"\xf0\x92\x80\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 5);
/* bad string */
json = "\"\x01\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 2);
json = "\"\x0f\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 2);
/* bad escape */
json = "\"\\z\"";
spa_json_init(&it[0], json, strlen(json));
expect_parse_error(&it[0], json, 1, 3);
return PWTEST_PASS;
}
PWTEST(json_encode)
{
char dst[128];
@ -431,6 +671,7 @@ PWTEST_SUITE(spa_json)
{
pwtest_add(json_abi, PWTEST_NOARG);
pwtest_add(json_parse, PWTEST_NOARG);
pwtest_add(json_parse_fail, PWTEST_NOARG);
pwtest_add(json_encode, PWTEST_NOARG);
pwtest_add(json_array, PWTEST_NOARG);
pwtest_add(json_overflow, PWTEST_NOARG);