mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-15 21:24:00 +00:00
YAML: Implement block scalar parsing.
This commit implements the parsing of YAML block scalars. Some code existed for it before, but it couldn't parse block scalars. This commit adds a new yaml node type to represent the block scalar values. This commit also deletes the 'spec-09-27' and 'spec-09-28' tests as they are identical to the test file 'spec-09-26'. This commit introduces 3 new utility functions to the YAML scanner class: `skip_s_space`, `advanceWhile` and `consumeLineBreakIfPresent`. Reviewers: Duncan P. N. Exon Smith Differential Revision: http://reviews.llvm.org/D9503 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237314 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -101,6 +101,7 @@ namespace yaml {
|
||||
void Node::anchor() {}
|
||||
void NullNode::anchor() {}
|
||||
void ScalarNode::anchor() {}
|
||||
void BlockScalarNode::anchor() {}
|
||||
void KeyValueNode::anchor() {}
|
||||
void MappingNode::anchor() {}
|
||||
void SequenceNode::anchor() {}
|
||||
@ -128,6 +129,7 @@ struct Token : ilist_node<Token> {
|
||||
TK_Key,
|
||||
TK_Value,
|
||||
TK_Scalar,
|
||||
TK_BlockScalar,
|
||||
TK_Alias,
|
||||
TK_Anchor,
|
||||
TK_Tag
|
||||
@ -137,6 +139,9 @@ struct Token : ilist_node<Token> {
|
||||
/// of the token in the input.
|
||||
StringRef Range;
|
||||
|
||||
/// The value of a block scalar node.
|
||||
std::string Value;
|
||||
|
||||
Token() : Kind(TK_Error) {}
|
||||
};
|
||||
}
|
||||
@ -348,6 +353,14 @@ private:
|
||||
/// b-break.
|
||||
StringRef::iterator skip_b_break(StringRef::iterator Position);
|
||||
|
||||
/// Skip a single s-space[31] starting at Position.
|
||||
///
|
||||
/// An s-space is 0x20
|
||||
///
|
||||
/// @returns The code unit after the s-space, or Position if it's not a
|
||||
/// s-space.
|
||||
StringRef::iterator skip_s_space(StringRef::iterator Position);
|
||||
|
||||
/// @brief Skip a single s-white[33] starting at Position.
|
||||
///
|
||||
/// A s-white is 0x20 | 0x9
|
||||
@ -373,6 +386,10 @@ private:
|
||||
StringRef::iterator skip_while( SkipWhileFunc Func
|
||||
, StringRef::iterator Position);
|
||||
|
||||
/// Skip minimal well-formed code unit subsequences until Func returns its
|
||||
/// input.
|
||||
void advanceWhile(SkipWhileFunc Func);
|
||||
|
||||
/// @brief Scan ns-uri-char[39]s starting at Cur.
|
||||
///
|
||||
/// This updates Cur and Column while scanning.
|
||||
@ -393,6 +410,11 @@ private:
|
||||
/// Pos is whitespace or a new line
|
||||
bool isBlankOrBreak(StringRef::iterator Position);
|
||||
|
||||
/// Consume a single b-break[28] if it's present at the current position.
|
||||
///
|
||||
/// Return false if the code unit at the current position isn't a line break.
|
||||
bool consumeLineBreakIfPresent();
|
||||
|
||||
/// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
|
||||
void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
|
||||
, unsigned AtColumn
|
||||
@ -466,6 +488,30 @@ private:
|
||||
/// @brief Scan a block scalar starting with | or >.
|
||||
bool scanBlockScalar(bool IsLiteral);
|
||||
|
||||
/// Scan a chomping indicator in a block scalar header.
|
||||
char scanBlockChompingIndicator();
|
||||
|
||||
/// Scan an indentation indicator in a block scalar header.
|
||||
unsigned scanBlockIndentationIndicator();
|
||||
|
||||
/// Scan a block scalar header.
|
||||
///
|
||||
/// Return false if an error occurred.
|
||||
bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
|
||||
bool &IsDone);
|
||||
|
||||
/// Look for the indentation level of a block scalar.
|
||||
///
|
||||
/// Return false if an error occurred.
|
||||
bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
|
||||
unsigned &LineBreaks, bool &IsDone);
|
||||
|
||||
/// Scan the indentation of a text line in a block scalar.
|
||||
///
|
||||
/// Return false if an error occurred.
|
||||
bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
|
||||
bool &IsDone);
|
||||
|
||||
/// @brief Scan a tag of the form !stuff.
|
||||
bool scanTag();
|
||||
|
||||
@ -612,6 +658,9 @@ bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
|
||||
case Token::TK_Scalar:
|
||||
OS << "Scalar: ";
|
||||
break;
|
||||
case Token::TK_BlockScalar:
|
||||
OS << "Block Scalar: ";
|
||||
break;
|
||||
case Token::TK_Alias:
|
||||
OS << "Alias: ";
|
||||
break;
|
||||
@ -816,6 +865,13 @@ StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
|
||||
return Position;
|
||||
}
|
||||
|
||||
StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
|
||||
if (Position == End)
|
||||
return Position;
|
||||
if (*Position == ' ')
|
||||
return Position + 1;
|
||||
return Position;
|
||||
}
|
||||
|
||||
StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
|
||||
if (Position == End)
|
||||
@ -844,6 +900,12 @@ StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
|
||||
return Position;
|
||||
}
|
||||
|
||||
void Scanner::advanceWhile(SkipWhileFunc Func) {
|
||||
auto Final = skip_while(Func, Current);
|
||||
Column += Final - Current;
|
||||
Current = Final;
|
||||
}
|
||||
|
||||
static bool is_ns_hex_digit(const char C) {
|
||||
return (C >= '0' && C <= '9')
|
||||
|| (C >= 'a' && C <= 'z')
|
||||
@ -906,6 +968,16 @@ bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Scanner::consumeLineBreakIfPresent() {
|
||||
auto Next = skip_b_break(Current);
|
||||
if (Next == Current)
|
||||
return false;
|
||||
Column = 0;
|
||||
++Line;
|
||||
Current = Next;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
|
||||
, unsigned AtColumn
|
||||
, bool IsRequired) {
|
||||
@ -1374,38 +1446,204 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scanner::scanBlockScalar(bool IsLiteral) {
|
||||
StringRef::iterator Start = Current;
|
||||
skip(1); // Eat | or >
|
||||
while(true) {
|
||||
StringRef::iterator i = skip_nb_char(Current);
|
||||
if (i == Current) {
|
||||
if (Column == 0)
|
||||
break;
|
||||
i = skip_b_break(Current);
|
||||
if (i != Current) {
|
||||
// We got a line break.
|
||||
Column = 0;
|
||||
++Line;
|
||||
Current = i;
|
||||
continue;
|
||||
} else {
|
||||
// There was an error, which should already have been printed out.
|
||||
char Scanner::scanBlockChompingIndicator() {
|
||||
char Indicator = ' ';
|
||||
if (Current != End && (*Current == '+' || *Current == '-')) {
|
||||
Indicator = *Current;
|
||||
skip(1);
|
||||
}
|
||||
return Indicator;
|
||||
}
|
||||
|
||||
/// Get the number of line breaks after chomping.
|
||||
///
|
||||
/// Return the number of trailing line breaks to emit, depending on
|
||||
/// \p ChompingIndicator.
|
||||
static unsigned getChompedLineBreaks(char ChompingIndicator,
|
||||
unsigned LineBreaks, StringRef Str) {
|
||||
if (ChompingIndicator == '-') // Strip all line breaks.
|
||||
return 0;
|
||||
if (ChompingIndicator == '+') // Keep all line breaks.
|
||||
return LineBreaks;
|
||||
// Clip trailing lines.
|
||||
return Str.empty() ? 0 : 1;
|
||||
}
|
||||
|
||||
unsigned Scanner::scanBlockIndentationIndicator() {
|
||||
unsigned Indent = 0;
|
||||
if (Current != End && (*Current >= '1' && *Current <= '9')) {
|
||||
Indent = unsigned(*Current - '0');
|
||||
skip(1);
|
||||
}
|
||||
return Indent;
|
||||
}
|
||||
|
||||
bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
|
||||
unsigned &IndentIndicator, bool &IsDone) {
|
||||
auto Start = Current;
|
||||
|
||||
ChompingIndicator = scanBlockChompingIndicator();
|
||||
IndentIndicator = scanBlockIndentationIndicator();
|
||||
// Check for the chomping indicator once again.
|
||||
if (ChompingIndicator == ' ')
|
||||
ChompingIndicator = scanBlockChompingIndicator();
|
||||
Current = skip_while(&Scanner::skip_s_white, Current);
|
||||
skipComment();
|
||||
|
||||
if (Current == End) { // EOF, we have an empty scalar.
|
||||
Token T;
|
||||
T.Kind = Token::TK_BlockScalar;
|
||||
T.Range = StringRef(Start, Current - Start);
|
||||
TokenQueue.push_back(T);
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!consumeLineBreakIfPresent()) {
|
||||
setError("Expected a line break after block scalar header", Current);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
|
||||
unsigned BlockExitIndent,
|
||||
unsigned &LineBreaks, bool &IsDone) {
|
||||
unsigned MaxAllSpaceLineCharacters = 0;
|
||||
StringRef::iterator LongestAllSpaceLine;
|
||||
|
||||
while (true) {
|
||||
advanceWhile(&Scanner::skip_s_space);
|
||||
if (skip_nb_char(Current) != Current) {
|
||||
// This line isn't empty, so try and find the indentation.
|
||||
if (Column <= BlockExitIndent) { // End of the block literal.
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
// We found the block's indentation.
|
||||
BlockIndent = Column;
|
||||
if (MaxAllSpaceLineCharacters > BlockIndent) {
|
||||
setError(
|
||||
"Leading all-spaces line must be smaller than the block indent",
|
||||
LongestAllSpaceLine);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
Current = i;
|
||||
if (skip_b_break(Current) != Current &&
|
||||
Column > MaxAllSpaceLineCharacters) {
|
||||
// Record the longest all-space line in case it's longer than the
|
||||
// discovered block indent.
|
||||
MaxAllSpaceLineCharacters = Column;
|
||||
LongestAllSpaceLine = Current;
|
||||
}
|
||||
|
||||
// Check for EOF.
|
||||
if (Current == End) {
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!consumeLineBreakIfPresent()) {
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
++LineBreaks;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
|
||||
unsigned BlockExitIndent, bool &IsDone) {
|
||||
// Skip the indentation.
|
||||
while (Column < BlockIndent) {
|
||||
auto I = skip_s_space(Current);
|
||||
if (I == Current)
|
||||
break;
|
||||
Current = I;
|
||||
++Column;
|
||||
}
|
||||
|
||||
if (Start == Current) {
|
||||
setError("Got empty block scalar", Start);
|
||||
return false;
|
||||
if (skip_nb_char(Current) == Current)
|
||||
return true;
|
||||
|
||||
if (Column <= BlockExitIndent) { // End of the block literal.
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Column < BlockIndent) {
|
||||
if (Current != End && *Current == '#') { // Trailing comment.
|
||||
IsDone = true;
|
||||
return true;
|
||||
}
|
||||
setError("A text line is less indented than the block scalar", Current);
|
||||
return false;
|
||||
}
|
||||
return true; // A normal text line.
|
||||
}
|
||||
|
||||
bool Scanner::scanBlockScalar(bool IsLiteral) {
|
||||
// Eat '|' or '>'
|
||||
assert(*Current == '|' || *Current == '>');
|
||||
skip(1);
|
||||
|
||||
char ChompingIndicator;
|
||||
unsigned BlockIndent;
|
||||
bool IsDone = false;
|
||||
if (!scanBlockScalarHeader(ChompingIndicator, BlockIndent, IsDone))
|
||||
return false;
|
||||
if (IsDone)
|
||||
return true;
|
||||
|
||||
auto Start = Current;
|
||||
unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
|
||||
unsigned LineBreaks = 0;
|
||||
if (BlockIndent == 0) {
|
||||
if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
|
||||
IsDone))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scan the block's scalars body.
|
||||
SmallString<256> Str;
|
||||
while (!IsDone) {
|
||||
if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
|
||||
return false;
|
||||
if (IsDone)
|
||||
break;
|
||||
|
||||
// Parse the current line.
|
||||
auto LineStart = Current;
|
||||
advanceWhile(&Scanner::skip_nb_char);
|
||||
if (LineStart != Current) {
|
||||
Str.append(LineBreaks, '\n');
|
||||
Str.append(StringRef(LineStart, Current - LineStart));
|
||||
LineBreaks = 0;
|
||||
}
|
||||
|
||||
// Check for EOF.
|
||||
if (Current == End)
|
||||
break;
|
||||
|
||||
if (!consumeLineBreakIfPresent())
|
||||
break;
|
||||
++LineBreaks;
|
||||
}
|
||||
|
||||
if (Current == End && !LineBreaks)
|
||||
// Ensure that there is at least one line break before the end of file.
|
||||
LineBreaks = 1;
|
||||
Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
|
||||
|
||||
// New lines may start a simple key.
|
||||
if (!FlowLevel)
|
||||
IsSimpleKeyAllowed = true;
|
||||
|
||||
Token T;
|
||||
T.Kind = Token::TK_Scalar;
|
||||
T.Kind = Token::TK_BlockScalar;
|
||||
T.Range = StringRef(Start, Current - Start);
|
||||
T.Value = Str.str().str();
|
||||
TokenQueue.push_back(T);
|
||||
return true;
|
||||
}
|
||||
@ -1607,6 +1845,7 @@ std::string Node::getVerbatimTag() const {
|
||||
case NK_Null:
|
||||
return "tag:yaml.org,2002:null";
|
||||
case NK_Scalar:
|
||||
case NK_BlockScalar:
|
||||
// TODO: Tag resolution.
|
||||
return "tag:yaml.org,2002:str";
|
||||
case NK_Mapping:
|
||||
@ -2138,6 +2377,11 @@ parse_property:
|
||||
, AnchorInfo.Range.substr(1)
|
||||
, TagInfo.Range
|
||||
, T.Range);
|
||||
case Token::TK_BlockScalar:
|
||||
getNext();
|
||||
return new (NodeAllocator)
|
||||
BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
|
||||
TagInfo.Range, T.Value, T.Range);
|
||||
case Token::TK_Key:
|
||||
// Don't eat the TK_Key, KeyValueNode expects it.
|
||||
return new (NodeAllocator)
|
||||
|
Reference in New Issue
Block a user