2011-12-20 09:26:26 +00:00
|
|
|
//===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===//
|
2011-12-16 13:09:10 +00:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This file implements a JSON parser.
|
|
|
|
//
|
|
|
|
// See http://www.json.org/ for an overview.
|
|
|
|
// See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
|
|
|
|
//
|
|
|
|
// FIXME: Currently this supports a subset of JSON. Specifically, support
|
|
|
|
// for numbers, booleans and null for values is missing.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2011-12-20 09:26:26 +00:00
|
|
|
#ifndef LLVM_SUPPORT_JSON_PARSER_H
|
|
|
|
#define LLVM_SUPPORT_JSON_PARSER_H
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/Support/Allocator.h"
|
2012-01-17 09:34:07 +00:00
|
|
|
#include "llvm/Support/Casting.h"
|
2011-12-16 13:09:10 +00:00
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2011-12-21 18:16:39 +00:00
|
|
|
#include "llvm/Support/SourceMgr.h"
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
class JSONContainer;
|
2011-12-16 13:09:10 +00:00
|
|
|
class JSONString;
|
|
|
|
class JSONValue;
|
|
|
|
class JSONKeyValuePair;
|
|
|
|
|
|
|
|
/// \brief Base class for a parsable JSON atom.
|
|
|
|
///
|
|
|
|
/// This class has no semantics other than being a unit of JSON data which can
|
|
|
|
/// be parsed out of a JSON document.
|
|
|
|
class JSONAtom {
|
|
|
|
public:
|
|
|
|
/// \brief Possible types of JSON objects.
|
|
|
|
enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
|
|
|
|
|
|
|
|
/// \brief Returns the type of this value.
|
|
|
|
Kind getKind() const { return MyKind; }
|
|
|
|
|
|
|
|
static bool classof(const JSONAtom *Atom) { return true; }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
JSONAtom(Kind MyKind) : MyKind(MyKind) {}
|
|
|
|
|
|
|
|
private:
|
|
|
|
Kind MyKind;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief A parser for JSON text.
|
|
|
|
///
|
|
|
|
/// Use an object of JSONParser to iterate over the values of a JSON text.
|
|
|
|
/// All objects are parsed during the iteration, so you can only iterate once
|
|
|
|
/// over the JSON text, but the cost of partial iteration is minimized.
|
|
|
|
/// Create a new JSONParser if you want to iterate multiple times.
|
|
|
|
class JSONParser {
|
|
|
|
public:
|
|
|
|
/// \brief Create a JSONParser for the given input.
|
|
|
|
///
|
|
|
|
/// Parsing is started via parseRoot(). Access to the object returned from
|
|
|
|
/// parseRoot() will parse the input lazily.
|
2011-12-21 18:16:39 +00:00
|
|
|
JSONParser(StringRef Input, SourceMgr *SM);
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
/// \brief Returns the outermost JSON value (either an array or an object).
|
|
|
|
///
|
|
|
|
/// Can return NULL if the input does not start with an array or an object.
|
2011-12-20 10:42:52 +00:00
|
|
|
/// The object is not parsed yet - the caller must iterate over the
|
|
|
|
/// returned object to trigger parsing.
|
2011-12-16 13:09:10 +00:00
|
|
|
///
|
|
|
|
/// A JSONValue can be either a JSONString, JSONObject or JSONArray.
|
|
|
|
JSONValue *parseRoot();
|
|
|
|
|
|
|
|
/// \brief Parses the JSON text and returns whether it is valid JSON.
|
|
|
|
///
|
|
|
|
/// In case validate() return false, failed() will return true and
|
|
|
|
/// getErrorMessage() will return the parsing error.
|
|
|
|
bool validate();
|
|
|
|
|
|
|
|
/// \brief Returns true if an error occurs during parsing.
|
|
|
|
///
|
|
|
|
/// If there was an error while parsing an object that was created by
|
|
|
|
/// iterating over the result of 'parseRoot', 'failed' will return true.
|
|
|
|
bool failed() const;
|
|
|
|
|
|
|
|
private:
|
|
|
|
/// \brief These methods manage the implementation details of parsing new JSON
|
|
|
|
/// atoms.
|
|
|
|
/// @{
|
|
|
|
JSONString *parseString();
|
|
|
|
JSONValue *parseValue();
|
|
|
|
JSONKeyValuePair *parseKeyValuePair();
|
|
|
|
/// @}
|
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
/// \brief Helpers to parse the elements out of both forms of containers.
|
2011-12-16 13:09:10 +00:00
|
|
|
/// @{
|
2012-01-17 09:34:07 +00:00
|
|
|
const JSONAtom *parseElement(JSONAtom::Kind ContainerKind);
|
|
|
|
StringRef::iterator parseFirstElement(JSONAtom::Kind ContainerKind,
|
|
|
|
char StartChar, char EndChar,
|
|
|
|
const JSONAtom *&Element);
|
|
|
|
StringRef::iterator parseNextElement(JSONAtom::Kind ContainerKind,
|
|
|
|
char EndChar,
|
|
|
|
const JSONAtom *&Element);
|
2011-12-16 13:09:10 +00:00
|
|
|
/// @}
|
|
|
|
|
|
|
|
/// \brief Whitespace parsing.
|
|
|
|
/// @{
|
|
|
|
void nextNonWhitespace();
|
|
|
|
bool isWhitespace();
|
|
|
|
/// @}
|
|
|
|
|
|
|
|
/// \brief These methods are used for error handling.
|
|
|
|
/// {
|
|
|
|
void setExpectedError(StringRef Expected, StringRef Found);
|
|
|
|
void setExpectedError(StringRef Expected, char Found);
|
|
|
|
bool errorIfAtEndOfFile(StringRef Message);
|
|
|
|
bool errorIfNotAt(char C, StringRef Message);
|
|
|
|
/// }
|
|
|
|
|
2011-12-20 10:42:52 +00:00
|
|
|
/// \brief Skips all elements in the given container.
|
2012-01-17 09:34:07 +00:00
|
|
|
bool skipContainer(const JSONContainer &Container);
|
2011-12-20 10:42:52 +00:00
|
|
|
|
|
|
|
/// \brief Skips to the next position behind the given JSON atom.
|
|
|
|
bool skip(const JSONAtom &Atom);
|
|
|
|
|
2011-12-16 13:09:10 +00:00
|
|
|
/// All nodes are allocated by the parser and will be deallocated when the
|
|
|
|
/// parser is destroyed.
|
|
|
|
BumpPtrAllocator ValueAllocator;
|
|
|
|
|
|
|
|
/// \brief The original input to the parser.
|
2011-12-21 18:16:39 +00:00
|
|
|
MemoryBuffer *InputBuffer;
|
|
|
|
|
|
|
|
/// \brief The source manager used for diagnostics and buffer management.
|
|
|
|
SourceMgr *SM;
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
/// \brief The current position in the parse stream.
|
|
|
|
StringRef::iterator Position;
|
|
|
|
|
2011-12-21 18:16:39 +00:00
|
|
|
/// \brief The end position for fast EOF checks without introducing
|
|
|
|
/// unnecessary dereferences.
|
|
|
|
StringRef::iterator End;
|
|
|
|
|
|
|
|
/// \brief If true, an error has occurred.
|
|
|
|
bool Failed;
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
friend class JSONContainer;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/// \brief Base class for JSON value objects.
|
|
|
|
///
|
|
|
|
/// This object represents an abstract JSON value. It is the root node behind
|
|
|
|
/// the group of JSON entities that can represent top-level values in a JSON
|
|
|
|
/// document. It has no API, and is just a placeholder in the type hierarchy of
|
|
|
|
/// nodes.
|
|
|
|
class JSONValue : public JSONAtom {
|
|
|
|
protected:
|
|
|
|
JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
|
|
|
switch (Atom->getKind()) {
|
|
|
|
case JK_Array:
|
|
|
|
case JK_Object:
|
|
|
|
case JK_String:
|
|
|
|
return true;
|
|
|
|
case JK_KeyValuePair:
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
llvm_unreachable("Invalid JSONAtom kind");
|
|
|
|
}
|
|
|
|
static bool classof(const JSONValue *Value) { return true; }
|
|
|
|
///@}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Gives access to the text of a JSON string.
|
|
|
|
///
|
|
|
|
/// FIXME: Implement a method to return the unescaped text.
|
|
|
|
class JSONString : public JSONValue {
|
|
|
|
public:
|
|
|
|
/// \brief Returns the underlying parsed text of the string.
|
|
|
|
///
|
|
|
|
/// This is the unescaped content of the JSON text.
|
|
|
|
/// See http://www.ietf.org/rfc/rfc4627.txt for details.
|
|
|
|
StringRef getRawText() const { return RawText; };
|
|
|
|
|
|
|
|
private:
|
|
|
|
JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
|
|
|
|
|
|
|
|
StringRef RawText;
|
|
|
|
|
|
|
|
friend class JSONParser;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
|
|
|
return Atom->getKind() == JK_String;
|
|
|
|
}
|
|
|
|
static bool classof(const JSONString *String) { return true; }
|
|
|
|
///@}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
|
|
|
|
///
|
|
|
|
/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
|
|
|
|
/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
|
|
|
|
/// They are not viable as top-level values either.
|
|
|
|
class JSONKeyValuePair : public JSONAtom {
|
|
|
|
public:
|
|
|
|
const JSONString * const Key;
|
|
|
|
const JSONValue * const Value;
|
|
|
|
|
|
|
|
private:
|
|
|
|
JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
|
|
|
|
: JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
|
|
|
|
|
|
|
|
friend class JSONParser;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
|
|
|
return Atom->getKind() == JK_KeyValuePair;
|
|
|
|
}
|
|
|
|
static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
|
|
|
|
///@}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief Implementation of JSON containers (arrays and objects).
|
|
|
|
///
|
|
|
|
/// JSONContainers drive the lazy parsing of JSON arrays and objects via
|
2011-12-20 10:42:52 +00:00
|
|
|
/// forward iterators.
|
2011-12-16 13:09:10 +00:00
|
|
|
class JSONContainer : public JSONValue {
|
2012-01-17 09:34:07 +00:00
|
|
|
private:
|
2011-12-16 13:09:10 +00:00
|
|
|
/// \brief An iterator that parses the underlying container during iteration.
|
|
|
|
///
|
|
|
|
/// Iterators on the same collection use shared state, so when multiple copies
|
|
|
|
/// of an iterator exist, only one is allowed to be used for iteration;
|
|
|
|
/// iterating multiple copies of an iterator of the same collection will lead
|
|
|
|
/// to undefined behavior.
|
2012-01-17 09:34:07 +00:00
|
|
|
class AtomIterator {
|
2011-12-16 13:09:10 +00:00
|
|
|
public:
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator(const AtomIterator &I) : Container(I.Container) {}
|
2011-12-16 13:09:10 +00:00
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
/// \brief Iterator interface.
|
|
|
|
///@{
|
|
|
|
bool operator==(const AtomIterator &I) const {
|
2011-12-16 13:09:10 +00:00
|
|
|
if (isEnd() || I.isEnd())
|
|
|
|
return isEnd() == I.isEnd();
|
|
|
|
return Container->Position == I.Container->Position;
|
|
|
|
}
|
2012-01-17 09:34:07 +00:00
|
|
|
bool operator!=(const AtomIterator &I) const {
|
|
|
|
return !(*this == I);
|
|
|
|
}
|
|
|
|
AtomIterator &operator++() {
|
2011-12-16 13:09:10 +00:00
|
|
|
Container->parseNextElement();
|
|
|
|
return *this;
|
|
|
|
}
|
2012-01-17 09:34:07 +00:00
|
|
|
const JSONAtom *operator*() {
|
|
|
|
return Container->Current;
|
|
|
|
}
|
|
|
|
///@}
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// \brief Create an iterator for which 'isEnd' returns true.
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator() : Container(0) {}
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
/// \brief Create an iterator for the given container.
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator(const JSONContainer *Container) : Container(Container) {}
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
bool isEnd() const {
|
|
|
|
return Container == 0 || Container->Position == StringRef::iterator();
|
|
|
|
}
|
|
|
|
|
|
|
|
const JSONContainer * const Container;
|
|
|
|
|
|
|
|
friend class JSONContainer;
|
|
|
|
};
|
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
protected:
|
|
|
|
/// \brief An iterator for the specified AtomT.
|
|
|
|
///
|
|
|
|
/// Used for the implementation of iterators for JSONArray and JSONObject.
|
|
|
|
template <typename AtomT>
|
|
|
|
class IteratorTemplate : public std::iterator<std::forward_iterator_tag,
|
|
|
|
const AtomT*> {
|
|
|
|
public:
|
|
|
|
explicit IteratorTemplate(const AtomIterator& AtomI)
|
|
|
|
: AtomI(AtomI) {}
|
|
|
|
|
|
|
|
bool operator==(const IteratorTemplate &I) const {
|
|
|
|
return AtomI == I.AtomI;
|
|
|
|
}
|
|
|
|
bool operator!=(const IteratorTemplate &I) const { return !(*this == I); }
|
|
|
|
|
|
|
|
IteratorTemplate &operator++() {
|
|
|
|
++AtomI;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
const AtomT *operator*() { return dyn_cast<AtomT>(*AtomI); }
|
|
|
|
|
|
|
|
private:
|
|
|
|
AtomIterator AtomI;
|
|
|
|
};
|
|
|
|
|
|
|
|
JSONContainer(JSONParser *Parser, char StartChar, char EndChar,
|
|
|
|
JSONAtom::Kind ContainerKind)
|
|
|
|
: JSONValue(ContainerKind), Parser(Parser),
|
|
|
|
Position(), Current(0), Started(false),
|
|
|
|
StartChar(StartChar), EndChar(EndChar) {}
|
|
|
|
|
2011-12-16 13:09:10 +00:00
|
|
|
/// \brief Returns a lazy parsing iterator over the container.
|
|
|
|
///
|
|
|
|
/// As the iterator drives the parse stream, begin() must only be called
|
|
|
|
/// once per container.
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator atom_begin() const {
|
2011-12-16 13:09:10 +00:00
|
|
|
if (Started)
|
|
|
|
report_fatal_error("Cannot parse container twice.");
|
|
|
|
Started = true;
|
|
|
|
// Set up the position and current element when we begin iterating over the
|
|
|
|
// container.
|
2012-01-17 09:34:07 +00:00
|
|
|
Position = Parser->parseFirstElement(getKind(), StartChar, EndChar, Current);
|
|
|
|
return AtomIterator(this);
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator atom_end() const {
|
|
|
|
return AtomIterator();
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2012-01-17 09:34:07 +00:00
|
|
|
AtomIterator atom_current() const {
|
2011-12-16 13:09:10 +00:00
|
|
|
if (!Started)
|
2012-01-17 09:34:07 +00:00
|
|
|
return atom_begin();
|
2011-12-16 13:09:10 +00:00
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
return AtomIterator(this);
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// \brief Parse the next element in the container into the Current element.
|
|
|
|
///
|
|
|
|
/// This routine is called as an iterator into this container walks through
|
|
|
|
/// its elements. It mutates the container's internal current node to point to
|
|
|
|
/// the next atom of the container.
|
|
|
|
void parseNextElement() const {
|
2011-12-20 10:42:52 +00:00
|
|
|
Parser->skip(*Current);
|
2012-01-17 09:34:07 +00:00
|
|
|
Position = Parser->parseNextElement(getKind(), EndChar, Current);
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// For parsing, JSONContainers call back into the JSONParser.
|
|
|
|
JSONParser * const Parser;
|
|
|
|
|
|
|
|
// 'Position', 'Current' and 'Started' store the state of the parse stream
|
|
|
|
// for iterators on the container, they don't change the container's elements
|
|
|
|
// and are thus marked as mutable.
|
|
|
|
mutable StringRef::iterator Position;
|
2012-01-17 09:34:07 +00:00
|
|
|
mutable const JSONAtom *Current;
|
2011-12-16 13:09:10 +00:00
|
|
|
mutable bool Started;
|
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
const char StartChar;
|
|
|
|
const char EndChar;
|
|
|
|
|
2011-12-16 13:09:10 +00:00
|
|
|
friend class JSONParser;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
2012-01-17 09:34:07 +00:00
|
|
|
switch (Atom->getKind()) {
|
|
|
|
case JK_Array:
|
|
|
|
case JK_Object:
|
|
|
|
return true;
|
|
|
|
case JK_KeyValuePair:
|
|
|
|
case JK_String:
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
llvm_unreachable("Invalid JSONAtom kind");
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
|
|
|
static bool classof(const JSONContainer *Container) { return true; }
|
|
|
|
///@}
|
|
|
|
};
|
|
|
|
|
|
|
|
/// \brief A simple JSON array.
|
2012-01-17 09:34:07 +00:00
|
|
|
class JSONArray : public JSONContainer {
|
|
|
|
public:
|
|
|
|
typedef IteratorTemplate<JSONValue> const_iterator;
|
|
|
|
|
|
|
|
/// \brief Returns a lazy parsing iterator over the container.
|
|
|
|
///
|
|
|
|
/// As the iterator drives the parse stream, begin() must only be called
|
|
|
|
/// once per container.
|
|
|
|
const_iterator begin() const { return const_iterator(atom_begin()); }
|
|
|
|
const_iterator end() const { return const_iterator(atom_end()); }
|
|
|
|
|
|
|
|
private:
|
|
|
|
JSONArray(JSONParser *Parser)
|
|
|
|
: JSONContainer(Parser, '[', ']', JSONAtom::JK_Array) {}
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
|
|
|
return Atom->getKind() == JSONAtom::JK_Array;
|
|
|
|
}
|
|
|
|
static bool classof(const JSONArray *Array) { return true; }
|
|
|
|
///@}
|
|
|
|
|
|
|
|
friend class JSONParser;
|
|
|
|
};
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
/// \brief A JSON object: an iterable list of JSON key-value pairs.
|
2012-01-17 09:34:07 +00:00
|
|
|
class JSONObject : public JSONContainer {
|
|
|
|
public:
|
|
|
|
typedef IteratorTemplate<JSONKeyValuePair> const_iterator;
|
2011-12-16 13:09:10 +00:00
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
/// \brief Returns a lazy parsing iterator over the container.
|
|
|
|
///
|
|
|
|
/// As the iterator drives the parse stream, begin() must only be called
|
|
|
|
/// once per container.
|
|
|
|
const_iterator begin() const { return const_iterator(atom_begin()); }
|
|
|
|
const_iterator end() const { return const_iterator(atom_end()); }
|
2011-12-16 13:09:10 +00:00
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
private:
|
|
|
|
JSONObject(JSONParser *Parser)
|
|
|
|
: JSONContainer(Parser, '{', '}', JSONAtom::JK_Object) {}
|
2011-12-16 13:09:10 +00:00
|
|
|
|
2012-01-17 09:34:07 +00:00
|
|
|
public:
|
|
|
|
/// \brief dyn_cast helpers
|
|
|
|
///@{
|
|
|
|
static bool classof(const JSONAtom *Atom) {
|
|
|
|
return Atom->getKind() == JSONAtom::JK_Object;
|
2011-12-16 13:09:10 +00:00
|
|
|
}
|
2012-01-17 09:34:07 +00:00
|
|
|
static bool classof(const JSONObject *Object) { return true; }
|
|
|
|
///@}
|
|
|
|
|
|
|
|
friend class JSONParser;
|
|
|
|
};
|
2011-12-16 13:09:10 +00:00
|
|
|
|
|
|
|
} // end namespace llvm
|
|
|
|
|
2011-12-20 09:26:26 +00:00
|
|
|
#endif // LLVM_SUPPORT_JSON_PARSER_H
|