Refactor JSMN for better conformance to JSON spec and to pass our more stringent tests

This commit is contained in:
Aaron Culliney 2016-03-25 22:32:53 -07:00
parent 163035b0c7
commit 87ae0f08e0
2 changed files with 356 additions and 20 deletions

369
externals/jsmn/jsmn.c vendored
View File

@ -1,5 +1,30 @@
#include "jsmn.h"
#include <assert.h>
typedef struct {
jsmntype_t type;
int start;
int end;
int size;
int skip;
int parent;
/* private data */
int _lasttype;
int _depth;
} jsmntok_priv_t;
/**
* Private parsing tokens.
*/
enum {
_JSMN_SEPARATOR_COLON = 201,
_JSMN_SEPARATOR_COMMA,
_JSMN_OBJECT_END,
_JSMN_ARRAY_END,
_JSMN_STRING_KEY,
};
/**
* Allocates a fresh unused token from the token pull.
*/
@ -14,20 +39,190 @@ static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
tok->size = 0;
tok->skip = 1;
tok->parent = -1;
((jsmntok_priv_t *)tok)->_lasttype = JSMN_UNDEFINED;
((jsmntok_priv_t *)tok)->_depth = -1;
return tok;
}
/**
* Fills token type and boundaries.
*/
static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
int start, int end) {
static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, int start, int end) {
token->type = type;
token->start = start;
token->end = end;
token->size = 0;
}
/**
* Parse digit(s).
*/
static int jsmn_parse_primitive_digits(const char *js, jsmntok_t *token, int start, int *end) {
int i;
int ended = 0;
/* must be at least one digit */
i = start;
switch (js[i]) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
default:
return JSMN_ERROR_PRIMITIVE_INVAL;
}
++i;
for (; i < token->end; i++) {
switch (js[i]) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
break;
default:
ended = 1;
break;
}
if (ended) {
break;
}
}
*end = i;
return 0;
}
/**
* Parse exponent.
*/
static int jsmn_parse_primitive_exp(const char *js, jsmntok_t *token, int start, int *end) {
int i;
int ret;
i = start;
assert(i < token->end);
if (js[i] != 'e' && js[i] != 'E') {
return JSMN_ERROR_PRIMITIVE_INVAL;
}
++i;
if (i >= token->end) {
return JSMN_ERROR_PART;
}
if (js[i] == '-' || js[i] == '+') {
++i;
if (i >= token->end) {
return JSMN_ERROR_PART;
}
}
ret = jsmn_parse_primitive_digits(js, token, i, &i);
if (ret) {
return ret;
}
/* should be exactly at the end */
if (i != token->end) {
return JSMN_ERROR_PRIMITIVE_INVAL;
}
*end = i;
return 0;
}
/**
* Parse fraction or exponent component.
*/
static int jsmn_parse_primitive_frac_or_exp(const char *js, jsmntok_t *token, int start, int *end) {
int i;
int ret;
i = start;
assert(i < token->end);
if (js[i] == '.') {
++i;
if (i >= token->end) {
return JSMN_ERROR_PART;
}
ret = jsmn_parse_primitive_digits(js, token, i, &i);
if (ret) {
return ret;
}
if (i < token->end) {
ret = jsmn_parse_primitive_exp(js, token, i, &i);
if (ret) {
return ret;
}
}
} else {
ret = jsmn_parse_primitive_exp(js, token, i, &i);
if (ret) {
return ret;
}
}
*end = i;
return 0;
}
/**
* Parse primitive number.
*/
static int jsmn_parse_primitive_number(const char *js, jsmntok_t *token) {
int i;
int ret;
i = token->start;
if (token->end - token->start <= 0) {
return JSMN_ERROR_PRIMITIVE_INVAL;
}
/* parse negative */
if (js[i] == '-') {
++i;
if (i >= token->end) {
return JSMN_ERROR_PART;
}
}
/* parse beginning zero */
if (js[i] == '0') {
++i;
if (i < token->end) {
ret = jsmn_parse_primitive_frac_or_exp(js, token, i, &i);
if (ret) {
return ret;
}
if (i < token->end) {
return JSMN_ERROR_PRIMITIVE_INVAL;
}
}
return 0;
}
/* parse main digits */
ret = jsmn_parse_primitive_digits(js, token, i, &i);
if (ret) {
return ret;
}
if (i == token->end) {
return 0;
}
/* parse remaining fraction or exponent */
assert(i < token->end);
ret = jsmn_parse_primitive_frac_or_exp(js, token, i, &i);
if (i < token->end) {
return JSMN_ERROR_PRIMITIVE_INVAL;
}
return ret;
}
/**
* Fills next available token with JSON primitive.
*/
@ -49,9 +244,11 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js,
return JSMN_ERROR_INVAL;
}
}
/* In strict mode primitive must be followed by a comma/object/array */
parser->pos = start;
return JSMN_ERROR_PART;
if ( (parser->pos < len) && (js[parser->pos] != '\0') ) {
/* In strict mode primitive must be followed by a comma/object/array or end-of-string */
parser->pos = start;
return JSMN_ERROR_PART;
}
found:
token = jsmn_alloc_token(parser, tokens, num_tokens);
@ -61,6 +258,32 @@ found:
}
jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
token->parent = parser->toksuper;
/* parse/validate primitives according */
do {
if (js[start] == 't') {
if ( (token->end - start == 4) && (js[start+1] == 'r') && (js[start+2] == 'u') && (js[start+3] == 'e') ) {
break;
}
} else if (js[start] == 'f') {
if ( (token->end - start == 5) && (js[start+1] == 'a') && (js[start+2] == 'l') && (js[start+3] == 's') && (js[start+4] == 'e') ) {
break;
}
} else if (js[start] == 'n') {
if ( (token->end - start == 4) && (js[start+1] == 'u') && (js[start+2] == 'l') && (js[start+3] == 'l') ) {
break;
}
} else {
if (jsmn_parse_primitive_number(js, token) == 0) {
break;
}
}
/* primitive validation failed */
parser->pos = token->start;
return JSMN_ERROR_PRIMITIVE_INVAL;
} while (0);
parser->pos--;
return 0;
}
@ -148,7 +371,8 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
jsmntok_t *tokens, unsigned int num_tokens) {
int r;
int i;
jsmntok_t *token;
jsmntok_priv_t *token;
int depth = 0;
int count = parser->toknext;
if (tokens == NULL) {
@ -162,25 +386,60 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
c = js[parser->pos];
switch (c) {
case '{': case '[':
/* check previous token is valid */
if (parser->toknext >= 1) {
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
depth = token->_depth;
switch (token->_lasttype) {
case _JSMN_SEPARATOR_COMMA:
case _JSMN_SEPARATOR_COLON:
case JSMN_ARRAY:
break;
default:
return JSMN_ERROR_INVAL;
}
}
/* check parent token is valid */
if (parser->toksuper != -1) {
token = (jsmntok_priv_t *)&tokens[parser->toksuper];
if (token->type == JSMN_OBJECT) {
return JSMN_ERROR_INVAL;
}
}
count++;
token = jsmn_alloc_token(parser, tokens, num_tokens);
if (token == NULL)
return JSMN_ERROR_NOMEM;
token = (jsmntok_priv_t *)jsmn_alloc_token(parser, tokens, num_tokens);
if (token == NULL) return JSMN_ERROR_NOMEM;
if (parser->toksuper != -1) {
tokens[parser->toksuper].size++;
token->parent = parser->toksuper;
jsmn_percolate_skip_counts(tokens, parser->toksuper);
}
token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
token->_lasttype = token->type;
token->_depth = depth+1;
token->start = parser->pos;
parser->toksuper = parser->toknext - 1;
break;
case '}': case ']':
type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
if (parser->toksuper == -1) {
return JSMN_ERROR_INVAL;
}
if (parser->toknext < 1) {
return JSMN_ERROR_INVAL;
}
token = &tokens[parser->toknext - 1];
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
switch (token->_lasttype) {
case JSMN_PRIMITIVE:
case JSMN_STRING:
case JSMN_OBJECT:
case JSMN_ARRAY:
case _JSMN_OBJECT_END:
case _JSMN_ARRAY_END:
break;
default:
return JSMN_ERROR_INVAL;
}
for (;;) {
if (token->start != -1 && token->end == -1) {
if (token->type != type) {
@ -188,34 +447,83 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
}
token->end = parser->pos + 1;
parser->toksuper = token->parent;
token->_depth--;
break;
}
if (token->parent == -1) {
break;
}
token = &tokens[token->parent];
token = (jsmntok_priv_t *)&tokens[token->parent];
}
token->_lasttype = token->type == JSMN_OBJECT ? _JSMN_OBJECT_END : _JSMN_ARRAY_END;
break;
case '\"':
if (parser->toknext >= 1) {
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
depth = token->_depth;
switch (token->_lasttype) {
case JSMN_OBJECT:
case JSMN_ARRAY:
case _JSMN_SEPARATOR_COLON:
case _JSMN_SEPARATOR_COMMA:
break;
default:
return JSMN_ERROR_INVAL;
}
}
r = jsmn_parse_string(parser, js, len, tokens, num_tokens);
if (r < 0) return r;
count++;
if (parser->toksuper != -1) {
tokens[parser->toksuper].size++;
jsmn_percolate_skip_counts(tokens, parser->toksuper);
token = (jsmntok_priv_t *)&tokens[parser->toksuper];
type = token->_lasttype == JSMN_OBJECT ? _JSMN_STRING_KEY : JSMN_STRING;
} else {
type = JSMN_STRING;
}
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
token->_lasttype = type;
token->_depth = depth;
break;
case '\t' : case '\r' : case '\n' : case ' ':
break;
case ':':
if (parser->toksuper == -1) {
return JSMN_ERROR_INVAL;
}
assert(parser->toknext >= 1);
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
switch (token->_lasttype) {
case _JSMN_STRING_KEY:
break;
default:
return JSMN_ERROR_INVAL;
}
parser->toksuper = parser->toknext - 1;
token->_lasttype = _JSMN_SEPARATOR_COLON;
break;
case ',':
if (parser->toksuper != -1 &&
if (parser->toksuper == -1) {
return JSMN_ERROR_INVAL;
}
assert(parser->toknext >= 1);
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
switch (token->_lasttype) {
case JSMN_PRIMITIVE:
case JSMN_STRING:
case _JSMN_OBJECT_END:
case _JSMN_ARRAY_END:
break;
default:
return JSMN_ERROR_INVAL;
}
if (
tokens[parser->toksuper].type != JSMN_ARRAY &&
tokens[parser->toksuper].type != JSMN_OBJECT) {
parser->toksuper = tokens[parser->toksuper].parent;
}
token->_lasttype = _JSMN_SEPARATOR_COMMA;
break;
/* In strict mode primitives are: numbers and booleans */
case '-': case '0': case '1' : case '2': case '3' : case '4':
@ -223,11 +531,23 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
case 't': case 'f': case 'n' :
/* And they must not be keys of the object */
if (parser->toksuper != -1) {
jsmntok_t *t = &tokens[parser->toksuper];
if (t->type == JSMN_OBJECT ||
(t->type == JSMN_STRING && t->size != 0)) {
token = (jsmntok_priv_t *)&tokens[parser->toksuper];
if (token->type == JSMN_OBJECT ||
(token->type == JSMN_STRING && token->size != 0)) {
return JSMN_ERROR_INVAL;
}
depth = token->_depth;
}
if (parser->toknext >= 1) {
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
switch (token->_lasttype) {
case JSMN_ARRAY:
case _JSMN_SEPARATOR_COLON:
case _JSMN_SEPARATOR_COMMA:
break;
default:
return JSMN_ERROR_INVAL;
}
}
r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens);
if (r < 0) return r;
@ -236,6 +556,9 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
tokens[parser->toksuper].size++;
jsmn_percolate_skip_counts(tokens, parser->toksuper);
}
token = (jsmntok_priv_t *)&tokens[parser->toknext - 1];
token->_lasttype = JSMN_PRIMITIVE;
token->_depth = depth;
break;
/* Unexpected char in strict mode */
@ -244,13 +567,23 @@ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
}
}
for (i = parser->toknext - 1; i >= 0; i--) {
if (tokens[0].type == JSMN_OBJECT || tokens[0].type == JSMN_ARRAY) {
/* Unmatched opened object or array */
if (tokens[i].start != -1 && tokens[i].end == -1) {
return JSMN_ERROR_PART;
if (((jsmntok_priv_t *)(&tokens[0]))->_depth != 0) {
return JSMN_ERROR_INVAL;
}
}
#ifndef NDEBUG
/* sanity checks */
for (i = 0; i < parser->toknext; i++) {
assert(tokens[i].start != -1);
assert(tokens[i].end != -1);
assert(((jsmntok_priv_t *)(&tokens[i]))->_depth != -1);
assert(((jsmntok_priv_t *)(&tokens[i]))->_lasttype != JSMN_UNDEFINED);
}
#endif
return count;
}

View File

@ -25,10 +25,12 @@ typedef enum {
enum jsmnerr {
/* Not enough tokens were provided */
JSMN_ERROR_NOMEM = -1,
/* Invalid character inside JSON string */
/* Generic invalid character JSON string */
JSMN_ERROR_INVAL = -2,
/* The string is not a full JSON packet, more bytes expected */
JSMN_ERROR_PART = -3
JSMN_ERROR_PART = -3,
/* A JSON primitive is invalid */
JSMN_ERROR_PRIMITIVE_INVAL = -202
};
/**
@ -44,6 +46,7 @@ typedef struct {
int size;
int skip;
int parent;
int privdata[2];
} jsmntok_t;
/**