[Support][YAML] Add support for accessing tags and tag handle substitution.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193004 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael J. Spencer 2013-10-18 22:38:04 +00:00
parent 630c3264a6
commit 44a4cfb63d
6 changed files with 169 additions and 47 deletions

View File

@ -43,6 +43,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/SMLoc.h"
#include <map>
#include <limits>
#include <utility>
@ -99,9 +101,6 @@ private:
OwningPtr<Document> CurrentDoc;
friend class Document;
/// @brief Validate a %YAML x.x directive.
void handleYAMLDirective(const Token &);
};
/// @brief Abstract base class for all Nodes.
@ -116,12 +115,21 @@ public:
NK_Alias
};
Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
Node(unsigned int Type, OwningPtr<Document> &, StringRef Anchor,
StringRef Tag);
/// @brief Get the value of the anchor attached to this node. If it does not
/// have one, getAnchor().size() will be 0.
StringRef getAnchor() const { return Anchor; }
/// \brief Get the tag as it was written in the document. This does not
/// perform tag resolution.
StringRef getRawTag() const { return Tag; }
/// \brief Get the verbatium tag for a given Node. This performs tag resoluton
/// and substitution.
std::string getVerbatimTag() const;
SMRange getSourceRange() const { return SourceRange; }
void setSourceRange(SMRange SR) { SourceRange = SR; }
@ -158,6 +166,8 @@ protected:
private:
unsigned int TypeID;
StringRef Anchor;
/// \brief The tag as typed in the document.
StringRef Tag;
};
/// @brief A null value.
@ -166,7 +176,8 @@ private:
/// !!null null
class NullNode : public Node {
public:
NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
NullNode(OwningPtr<Document> &D)
: Node(NK_Null, D, StringRef(), StringRef()) {}
static inline bool classof(const Node *N) {
return N->getType() == NK_Null;
@ -180,9 +191,9 @@ public:
/// Adena
class ScalarNode : public Node {
public:
ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
: Node(NK_Scalar, D, Anchor)
, Value(Val) {
ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag,
StringRef Val)
: Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
SMLoc Start = SMLoc::getFromPointer(Val.begin());
SMLoc End = SMLoc::getFromPointer(Val.end());
SourceRange = SMRange(Start, End);
@ -222,7 +233,7 @@ private:
class KeyValueNode : public Node {
public:
KeyValueNode(OwningPtr<Document> &D)
: Node(NK_KeyValue, D, StringRef())
: Node(NK_KeyValue, D, StringRef(), StringRef())
, Key(0)
, Value(0)
{}
@ -338,13 +349,10 @@ public:
MT_Inline ///< An inline mapping node is used for "[key: value]".
};
MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
: Node(NK_Mapping, D, Anchor)
, Type(MT)
, IsAtBeginning(true)
, IsAtEnd(false)
, CurrentEntry(0)
{}
MappingNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag,
MappingType MT)
: Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
IsAtEnd(false), CurrentEntry(0) {}
friend class basic_collection_iterator<MappingNode, KeyValueNode>;
typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
@ -397,14 +405,12 @@ public:
ST_Indentless
};
SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
: Node(NK_Sequence, D, Anchor)
, SeqType(ST)
, IsAtBeginning(true)
, IsAtEnd(false)
, WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
, CurrentEntry(0)
{}
SequenceNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Tag,
SequenceType ST)
: Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
IsAtEnd(false),
WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
CurrentEntry(0) {}
friend class basic_collection_iterator<SequenceNode, Node>;
typedef basic_collection_iterator<SequenceNode, Node> iterator;
@ -442,7 +448,7 @@ private:
class AliasNode : public Node {
public:
AliasNode(OwningPtr<Document> &D, StringRef Val)
: Node(NK_Alias, D, StringRef()), Name(Val) {}
: Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
StringRef getName() const { return Name; }
Node *getTarget();
@ -475,6 +481,10 @@ public:
return Root = parseBlockNode();
}
const std::map<StringRef, StringRef> &getTagMap() const {
return TagMap;
}
private:
friend class Node;
friend class document_iterator;
@ -490,18 +500,23 @@ private:
/// document.
Node *Root;
/// \brief Maps tag prefixes to their expansion.
std::map<StringRef, StringRef> TagMap;
Token &peekNext();
Token getNext();
void setError(const Twine &Message, Token &Location) const;
bool failed() const;
void handleTagDirective(const Token &Tag) {
// TODO: Track tags.
}
/// @brief Parse %BLAH directives and return true if any were encountered.
bool parseDirectives();
/// \brief Parse %YAML
void parseYAMLDirective();
/// \brief Parse %TAG
void parseTAGDirective();
/// @brief Consume the next token and error if it is not \a TK.
bool expectToken(int TK);
};

View File

@ -1071,13 +1071,21 @@ bool Scanner::scanDirective() {
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
Token T;
T.Kind = Token::TK_VersionDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
} else if(Name == "TAG") {
Current = skip_while(&Scanner::skip_ns_char, Current);
Current = skip_while(&Scanner::skip_s_white, Current);
Current = skip_while(&Scanner::skip_ns_char, Current);
T.Kind = Token::TK_TagDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
}
return false;
}
@ -1564,10 +1572,6 @@ void Stream::printError(Node *N, const Twine &Msg) {
, Ranges);
}
void Stream::handleYAMLDirective(const Token &t) {
// TODO: Ensure version is 1.x.
}
document_iterator Stream::begin() {
if (CurrentDoc)
report_fatal_error("Can only iterate over the stream once");
@ -1588,14 +1592,59 @@ void Stream::skip() {
i->skip();
}
Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A, StringRef T)
: Doc(D)
, TypeID(Type)
, Anchor(A) {
, Anchor(A)
, Tag(T) {
SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
SourceRange = SMRange(Start, Start);
}
std::string Node::getVerbatimTag() const {
StringRef Raw = getRawTag();
if (!Raw.empty() && Raw != "!") {
std::string Ret;
if (Raw.find_last_of('!') == 0) {
Ret = Doc->getTagMap().find("!")->second;
Ret += Raw.substr(1);
return std::move(Ret);
} else if (Raw.startswith("!!")) {
Ret = Doc->getTagMap().find("!!")->second;
Ret += Raw.substr(2);
return std::move(Ret);
} else {
StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
std::map<StringRef, StringRef>::const_iterator It =
Doc->getTagMap().find(TagHandle);
if (It != Doc->getTagMap().end())
Ret = It->second;
else {
Token T;
T.Kind = Token::TK_Tag;
T.Range = TagHandle;
setError(Twine("Unknown tag handle ") + TagHandle, T);
}
Ret += Raw.substr(Raw.find_last_of('!') + 1);
return std::move(Ret);
}
}
switch (getType()) {
case NK_Null:
return "tag:yaml.org,2002:null";
case NK_Scalar:
// TODO: Tag resolution.
return "tag:yaml.org,2002:str";
case NK_Mapping:
return "tag:yaml.org,2002:map";
case NK_Sequence:
return "tag:yaml.org,2002:seq";
}
return "";
}
Token &Node::peekNext() {
return Doc->peekNext();
}
@ -1999,6 +2048,10 @@ void SequenceNode::increment() {
}
Document::Document(Stream &S) : stream(S), Root(0) {
// Tag maps starts with two default mappings.
TagMap["!"] = "!";
TagMap["!!"] = "tag:yaml.org,2002:";
if (parseDirectives())
expectToken(Token::TK_DocumentStart);
Token &T = peekNext();
@ -2042,6 +2095,7 @@ Node *Document::parseBlockNode() {
Token T = peekNext();
// Handle properties.
Token AnchorInfo;
Token TagInfo;
parse_property:
switch (T.Kind) {
case Token::TK_Alias:
@ -2056,7 +2110,11 @@ parse_property:
T = peekNext();
goto parse_property;
case Token::TK_Tag:
getNext(); // Skip TK_Tag.
if (TagInfo.Kind == Token::TK_Tag) {
setError("Already encountered a tag for this node!", T);
return 0;
}
TagInfo = getNext(); // Consume TK_Tag.
T = peekNext();
goto parse_property;
default:
@ -2070,42 +2128,49 @@ parse_property:
// Don't eat the TK_BlockEntry, SequenceNode needs it.
return new (NodeAllocator) SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Indentless);
case Token::TK_BlockSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Block);
case Token::TK_BlockMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Block);
case Token::TK_FlowSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Flow);
case Token::TK_FlowMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Flow);
case Token::TK_Scalar:
getNext();
return new (NodeAllocator)
ScalarNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, T.Range);
case Token::TK_Key:
// Don't eat the TK_Key, KeyValueNode expects it.
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Inline);
case Token::TK_DocumentStart:
case Token::TK_DocumentEnd:
@ -2126,10 +2191,10 @@ bool Document::parseDirectives() {
while (true) {
Token T = peekNext();
if (T.Kind == Token::TK_TagDirective) {
handleTagDirective(getNext());
parseTAGDirective();
isDirective = true;
} else if (T.Kind == Token::TK_VersionDirective) {
stream.handleYAMLDirective(getNext());
parseYAMLDirective();
isDirective = true;
} else
break;
@ -2137,6 +2202,21 @@ bool Document::parseDirectives() {
return isDirective;
}
void Document::parseYAMLDirective() {
getNext(); // Eat %YAML <version>
}
void Document::parseTAGDirective() {
Token Tag = getNext(); // %TAG <handle> <prefix>
StringRef T = Tag.Range;
// Strip %TAG
T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
std::size_t HandleEnd = T.find_first_of(" \t");
StringRef TagHandle = T.substr(0, HandleEnd);
StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
TagMap[TagHandle] = TagPrefix;
}
bool Document::expectToken(int TK) {
Token T = getNext();
if (T.Kind != TK) {

View File

@ -1,4 +1,4 @@
# RUN: yaml-bench -canonical %s
# RUN: yaml-bench -canonical %s | FileCheck %s
%TAG ! tag:clarkevans.com,2002:
--- !shape
@ -14,3 +14,8 @@
start: *ORIGIN
color: 0xFFEEBB
text: Pretty vector drawing.
#CHECK: !<tag:clarkevans.com,2002:shape>
#CHECK: !<tag:clarkevans.com,2002:circle>
#CHECK: !<tag:clarkevans.com,2002:line>
#CHECK: !<tag:clarkevans.com,2002:label>

View File

@ -1,5 +1,7 @@
# RUN: yaml-bench -canonical %s
# RUN: yaml-bench -canonical %s | FileCheck %s
%TAG !yaml! tag:yaml.org,2002:
---
!yaml!str "foo"
#CHECK: !!str "foo"

View File

@ -1,5 +1,11 @@
# RUN: yaml-bench -canonical %s
# RUN: yaml-bench -canonical %s | FileCheck %s
- !!yaml '!'
- !!yaml '&'
- !!yaml '*'
# CHECK: !!seq [
# CHECK: !!yaml "!",
# CHECK: !!yaml "&",
# CHECK: !!yaml "*",
# CHECK: ]

View File

@ -63,6 +63,20 @@ static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
return os;
}
/// \brief Pretty print a tag by replacing tag:yaml.org,2002: with !!.
static std::string prettyTag(yaml::Node *N) {
std::string Tag = N->getVerbatimTag();
if (StringRef(Tag).startswith("tag:yaml.org,2002:")) {
std::string Ret = "!!";
Ret += StringRef(Tag).substr(18);
return std::move(Ret);
}
std::string Ret = "!<";
Ret += Tag;
Ret += ">";
return Ret;
}
static void dumpNode( yaml::Node *n
, unsigned Indent = 0
, bool SuppressFirstIndent = false) {
@ -76,9 +90,9 @@ static void dumpNode( yaml::Node *n
if (yaml::ScalarNode *sn = dyn_cast<yaml::ScalarNode>(n)) {
SmallString<32> Storage;
StringRef Val = sn->getValue(Storage);
outs() << "!!str \"" << yaml::escape(Val) << "\"";
outs() << prettyTag(n) << " \"" << yaml::escape(Val) << "\"";
} else if (yaml::SequenceNode *sn = dyn_cast<yaml::SequenceNode>(n)) {
outs() << "!!seq [\n";
outs() << prettyTag(n) << " [\n";
++Indent;
for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
i != e; ++i) {
@ -88,7 +102,7 @@ static void dumpNode( yaml::Node *n
--Indent;
outs() << indent(Indent) << "]";
} else if (yaml::MappingNode *mn = dyn_cast<yaml::MappingNode>(n)) {
outs() << "!!map {\n";
outs() << prettyTag(n) << " {\n";
++Indent;
for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
i != e; ++i) {
@ -104,7 +118,7 @@ static void dumpNode( yaml::Node *n
} else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
outs() << "*" << an->getName();
} else if (dyn_cast<yaml::NullNode>(n)) {
outs() << "!!null null";
outs() << prettyTag(n) << " null";
}
}