// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "fmt" "html" "io" "text/template" "text/template/parse" ) // escapeTemplate rewrites the named template, which must be // associated with t, to guarantee that the output of any of the named // templates is properly escaped. If no error is returned, then the named templates have // been modified. Otherwise the named templates have been rendered // unusable. func escapeTemplate(tmpl *Template, node parse.Node, name string) error { e := newEscaper(tmpl) c, _ := e.escapeTree(context{}, node, name, 0) var err error if c.err != nil { err, c.err.Name = c.err, name } else if c.state != stateText { err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} } if err != nil { // Prevent execution of unsafe templates. if t := tmpl.set[name]; t != nil { t.escapeErr = err t.text.Tree = nil t.Tree = nil } return err } e.commit() if t := tmpl.set[name]; t != nil { t.escapeErr = escapeOK t.Tree = t.text.Tree } return nil } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ "html_template_attrescaper": attrEscaper, "html_template_commentescaper": commentEscaper, "html_template_cssescaper": cssEscaper, "html_template_cssvaluefilter": cssValueFilter, "html_template_htmlnamefilter": htmlNameFilter, "html_template_htmlescaper": htmlEscaper, "html_template_jsregexpescaper": jsRegexpEscaper, "html_template_jsstrescaper": jsStrEscaper, "html_template_jsvalescaper": jsValEscaper, "html_template_nospaceescaper": htmlNospaceEscaper, "html_template_rcdataescaper": rcdataEscaper, "html_template_urlescaper": urlEscaper, "html_template_urlfilter": urlFilter, "html_template_urlnormalizer": urlNormalizer, } // equivEscapers matches contextual escapers to equivalent template builtins. var equivEscapers = map[string]string{ "html_template_attrescaper": "html", "html_template_htmlescaper": "html", "html_template_nospaceescaper": "html", "html_template_rcdataescaper": "html", "html_template_urlescaper": "urlquery", "html_template_urlnormalizer": "urlquery", } // escaper collects type inferences about templates and changes needed to make // templates injection safe. type escaper struct { tmpl *Template // output[templateName] is the output context for a templateName that // has been mangled to include its input context. output map[string]context // derived[c.mangle(name)] maps to a template derived from the template // named name templateName for the start context c. derived map[string]*template.Template // called[templateName] is a set of called mangled template names. called map[string]bool // xxxNodeEdits are the accumulated edits to apply during commit. // Such edits are not applied immediately in case a template set // executes a given template in different escaping contexts. actionNodeEdits map[*parse.ActionNode][]string templateNodeEdits map[*parse.TemplateNode]string textNodeEdits map[*parse.TextNode][]byte } // newEscaper creates a blank escaper for the given set. func newEscaper(t *Template) *escaper { return &escaper{ t, map[string]context{}, map[string]*template.Template{}, map[string]bool{}, map[*parse.ActionNode][]string{}, map[*parse.TemplateNode]string{}, map[*parse.TextNode][]byte{}, } } // filterFailsafe is an innocuous word that is emitted in place of unsafe values // by sanitizer functions. It is not a keyword in any programming language, // contains no special characters, is not empty, and when it appears in output // it is distinct enough that a developer can find the source of the problem // via a search engine. const filterFailsafe = "ZgotmplZ" // escape escapes a template node. func (e *escaper) escape(c context, n parse.Node) context { switch n := n.(type) { case *parse.ActionNode: return e.escapeAction(c, n) case *parse.IfNode: return e.escapeBranch(c, &n.BranchNode, "if") case *parse.ListNode: return e.escapeList(c, n) case *parse.RangeNode: return e.escapeBranch(c, &n.BranchNode, "range") case *parse.TemplateNode: return e.escapeTemplate(c, n) case *parse.TextNode: return e.escapeText(c, n) case *parse.WithNode: return e.escapeBranch(c, &n.BranchNode, "with") } panic("escaping " + n.String() + " is unimplemented") } // escapeAction escapes an action template node. func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { if len(n.Pipe.Decl) != 0 { // A local variable assignment, not an interpolation. return c } c = nudge(c) s := make([]string, 0, 3) switch c.state { case stateError: return c case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: switch c.urlPart { case urlPartNone: s = append(s, "html_template_urlfilter") fallthrough case urlPartPreQuery: switch c.state { case stateCSSDqStr, stateCSSSqStr: s = append(s, "html_template_cssescaper") default: s = append(s, "html_template_urlnormalizer") } case urlPartQueryOrFrag: s = append(s, "html_template_urlescaper") case urlPartUnknown: return context{ state: stateError, err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous URL context", n), } default: panic(c.urlPart.String()) } case stateJS: s = append(s, "html_template_jsvalescaper") // A slash after a value starts a div operator. c.jsCtx = jsCtxDivOp case stateJSDqStr, stateJSSqStr: s = append(s, "html_template_jsstrescaper") case stateJSRegexp: s = append(s, "html_template_jsregexpescaper") case stateCSS: s = append(s, "html_template_cssvaluefilter") case stateText: s = append(s, "html_template_htmlescaper") case stateRCDATA: s = append(s, "html_template_rcdataescaper") case stateAttr: // Handled below in delim check. case stateAttrName, stateTag: c.state = stateAttrName s = append(s, "html_template_htmlnamefilter") default: if isComment(c.state) { s = append(s, "html_template_commentescaper") } else { panic("unexpected state " + c.state.String()) } } switch c.delim { case delimNone: // No extra-escaping needed for raw text content. case delimSpaceOrTagEnd: s = append(s, "html_template_nospaceescaper") default: s = append(s, "html_template_attrescaper") } e.editActionNode(n, s) return c } // allIdents returns the names of the identifiers under the Ident field of the node, // which might be a singleton (Identifier) or a slice (Field or Chain). func allIdents(node parse.Node) []string { switch node := node.(type) { case *parse.IdentifierNode: return []string{node.Ident} case *parse.FieldNode: return node.Ident case *parse.ChainNode: return node.Field } return nil } // ensurePipelineContains ensures that the pipeline has commands with // the identifiers in s in order. // If the pipeline already has some of the sanitizers, do not interfere. // For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it // has one matching, "html", and one to insert, "escapeJSVal", to produce // (.X | escapeJSVal | html). func ensurePipelineContains(p *parse.PipeNode, s []string) { if len(s) == 0 { return } n := len(p.Cmds) // Find the identifiers at the end of the command chain. idents := p.Cmds for i := n - 1; i >= 0; i-- { if cmd := p.Cmds[i]; len(cmd.Args) != 0 { if _, ok := cmd.Args[0].(*parse.IdentifierNode); ok { continue } } idents = p.Cmds[i+1:] } dups := 0 for _, idNode := range idents { for _, ident := range allIdents(idNode.Args[0]) { if escFnsEq(s[dups], ident) { dups++ if dups == len(s) { return } } } } newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups) copy(newCmds, p.Cmds) // Merge existing identifier commands with the sanitizers needed. for _, idNode := range idents { pos := idNode.Args[0].Position() for _, ident := range allIdents(idNode.Args[0]) { i := indexOfStr(ident, s, escFnsEq) if i != -1 { for _, name := range s[:i] { newCmds = appendCmd(newCmds, newIdentCmd(name, pos)) } s = s[i+1:] } } newCmds = appendCmd(newCmds, idNode) } // Create any remaining sanitizers. for _, name := range s { newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) } p.Cmds = newCmds } // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) // for all x. var redundantFuncs = map[string]map[string]bool{ "html_template_commentescaper": { "html_template_attrescaper": true, "html_template_nospaceescaper": true, "html_template_htmlescaper": true, }, "html_template_cssescaper": { "html_template_attrescaper": true, }, "html_template_jsregexpescaper": { "html_template_attrescaper": true, }, "html_template_jsstrescaper": { "html_template_attrescaper": true, }, "html_template_urlescaper": { "html_template_urlnormalizer": true, }, } // appendCmd appends the given command to the end of the command pipeline // unless it is redundant with the last command. func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { if n := len(cmds); n != 0 { last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) next, okNext := cmd.Args[0].(*parse.IdentifierNode) if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { return cmds } } return append(cmds, cmd) } // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found. func indexOfStr(s string, strs []string, eq func(a, b string) bool) int { for i, t := range strs { if eq(s, t) { return i } } return -1 } // escFnsEq reports whether the two escaping functions are equivalent. func escFnsEq(a, b string) bool { if e := equivEscapers[a]; e != "" { a = e } if e := equivEscapers[b]; e != "" { b = e } return a == b } // newIdentCmd produces a command containing a single identifier node. func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { return &parse.CommandNode{ NodeType: parse.NodeCommand, Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. } } // nudge returns the context that would result from following empty string // transitions from the input context. // For example, parsing: // `90% of the time. e.output[t.Name()] = c return e.escapeListConditionally(c, t.Tree.Root, filter) } // delimEnds maps each delim to a string of characters that terminate it. var delimEnds = [...]string{ delimDoubleQuote: `"`, delimSingleQuote: "'", // Determined empirically by running the below in various browsers. // var div = document.createElement("DIV"); // for (var i = 0; i < 0x10000; ++i) { // div.innerHTML = ""; // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) // document.write("

U+" + i.toString(16)); // } delimSpaceOrTagEnd: " \t\n\f\r>", } var doctypeBytes = []byte("= i; j-- { if s[j] == '<' { end = j break } } } for j := i; j < end; j++ { if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { b.Write(s[written:j]) b.WriteString("<") written = j + 1 } } } else if isComment(c.state) && c.delim == delimNone { switch c.state { case stateJSBlockCmt: // http://es5.github.com/#x7.4: // "Comments behave like white space and are // discarded except that, if a MultiLineComment // contains a line terminator character, then // the entire comment is considered to be a // LineTerminator for purposes of parsing by // the syntactic grammar." if bytes.IndexAny(s[written:i1], "\n\r\u2028\u2029") != -1 { b.WriteByte('\n') } else { b.WriteByte(' ') } case stateCSSBlockCmt: b.WriteByte(' ') } written = i1 } if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { // Preserve the portion between written and the comment start. cs := i1 - 2 if c1.state == stateHTMLCmt { // "