// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bytes" "io" "strings" "testing" ) type tokenTest struct { // A short description of the test case. desc string // The HTML to parse. html string // The string representations of the expected tokens, joined by '$'. golden string } var tokenTests = []tokenTest{ { "empty", "", "", }, // A single text node. The tokenizer should not break text nodes on whitespace, // nor should it normalize whitespace within a text node. { "text", "foo bar", "foo bar", }, // An entity. { "entity", "one < two", "one < two", }, // A start, self-closing and end tag. The tokenizer does not care if the start // and end tokens don't match; that is the job of the parser. { "tags", "bd", "$b$$d$", }, // Angle brackets that aren't a tag. { "not a tag #0", "<", "<", }, { "not a tag #1", "", "", }, { "not a tag #3", "ab", "a$b", }, { "not a tag #4", "", "", }, { "not a tag #5", "", }, { "not a tag #6", "", "", }, { "not a tag #7", "a < b", "a < b", }, { "not a tag #8", "<.>", "<.>", }, { "not a tag #9", "a<<>>c", "a<<$$>>c", }, { "not a tag #10", "if x<0 and y < 0 then x*y>0", "if x<0 and y < 0 then x*y>0", }, // EOF in a tag name. { "tag name eof #0", "", }, { "tag name eof #4", ``, }, // Some malformed tags that are missing a '>'. { "malformed tag #0", ``, ``, }, { "malformed tag #1", `

`, `

`, }, { "malformed tag #2", `

`, }, { "malformed tag #3", `

`, }, { "malformed tag #4", `

`, `

`, }, { "malformed tag #5", `

`, }, { "malformed tag #6", `

`, `

`, }, { "malformed tag #7", `

`, }, { "malformed tag #8", `

`, `

`, }, // Raw text and RCDATA. { "basic raw text", "", "", }, { "unfinished script end tag", "$a$", }, { "'>' completes script end tag", "", "", }, { "self-closing script end tag", "", "", }, { "nested script tag", "", "", }, { "script/style mismatched tags", "