// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package template import ( "bytes" "encoding/json" "fmt" "reflect" "strings" "unicode/utf8" ) // nextJSCtx returns the context that determines whether a slash after the // given run of tokens starts a regular expression instead of a division // operator: / or /=. // // This assumes that the token run does not include any string tokens, comment // tokens, regular expression literal tokens, or division operators. // // This fails on some valid but nonsensical JavaScript programs like // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to // fail on any known useful programs. It is based on the draft // JavaScript 2.0 lexical grammar and requires one token of lookbehind: // http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html func nextJSCtx(s []byte, preceding jsCtx) jsCtx { s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029") if len(s) == 0 { return preceding } // All cases below are in the single-byte UTF-8 group. switch c, n := s[len(s)-1], len(s); c { case '+', '-': // ++ and -- are not regexp preceders, but + and - are whether // they are used as infix or prefix operators. start := n - 1 // Count the number of adjacent dashes or pluses. for start > 0 && s[start-1] == c { start-- } if (n-start)&1 == 1 { // Reached for trailing minus signs since "---" is the // same as "-- -". return jsCtxRegexp } return jsCtxDivOp case '.': // Handle "42." if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { return jsCtxDivOp } return jsCtxRegexp // Suffixes for all punctuators from section 7.7 of the language spec // that only end binary operators not handled above. case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': return jsCtxRegexp // Suffixes for all punctuators from section 7.7 of the language spec // that are prefix operators not handled above. case '!', '~': return jsCtxRegexp // Matches all the punctuators from section 7.7 of the language spec // that are open brackets not handled above. case '(', '[': return jsCtxRegexp // Matches all the punctuators from section 7.7 of the language spec // that precede expression starts. case ':', ';', '{': return jsCtxRegexp // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and // are handled in the default except for '}' which can precede a // division op as in // ({ valueOf: function () { return 42 } } / 2 // which is valid, but, in practice, developers don't divide object // literals, so our heuristic works well for code like // function () { ... } /foo/.test(x) && sideEffect(); // The ')' punctuator can precede a regular expression as in // if (b) /foo/.test(x) && ... // but this is much less likely than // (a + b) / c case '}': return jsCtxRegexp default: // Look for an IdentifierName and see if it is a keyword that // can precede a regular expression. j := n for j > 0 && isJSIdentPart(rune(s[j-1])) { j-- } if regexpPrecederKeywords[string(s[j:])] { return jsCtxRegexp } } // Otherwise is a punctuator not listed above, or // a string which precedes a div op, or an identifier // which precedes a div op. return jsCtxDivOp } // regexpPrecederKeywords is a set of reserved JS keywords that can precede a // regular expression in JS source. var regexpPrecederKeywords = map[string]bool{ "break": true, "case": true, "continue": true, "delete": true, "do": true, "else": true, "finally": true, "in": true, "instanceof": true, "return": true, "throw": true, "try": true, "typeof": true, "void": true, } var jsonMarshalType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() // indirectToJSONMarshaler returns the value, after dereferencing as many times // as necessary to reach the base type (or nil) or an implementation of json.Marshal. func indirectToJSONMarshaler(a interface{}) interface{} { v := reflect.ValueOf(a) for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Ptr && !v.IsNil() { v = v.Elem() } return v.Interface() } // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has // neither side-effects nor free variables outside (NaN, Infinity). func jsValEscaper(args ...interface{}) string { var a interface{} if len(args) == 1 { a = indirectToJSONMarshaler(args[0]) switch t := a.(type) { case JS: return string(t) case JSStr: // TODO: normalize quotes. return `"` + string(t) + `"` case json.Marshaler: // Do not treat as a Stringer. case fmt.Stringer: a = t.String() } } else { for i, arg := range args { args[i] = indirectToJSONMarshaler(arg) } a = fmt.Sprint(args...) } // TODO: detect cycles before calling Marshal which loops infinitely on // cyclic data. This may be an unacceptable DoS risk. b, err := json.Marshal(a) if err != nil { // Put a space before comment so that if it is flush against // a division operator it is not turned into a line comment: // x/{{y}} // turning into // x//* error marshaling y: // second line of error message */null return fmt.Sprintf(" /* %s */null ", strings.Replace(err.Error(), "*/", "* /", -1)) } // TODO: maybe post-process output to prevent it from containing // "", "", or " element, // or in an HTML5 event handler attribute such as onclick. func jsStrEscaper(args ...interface{}) string { s, t := stringify(args...) if t == contentTypeJSStr { return replace(s, jsStrNormReplacementTable) } return replace(s, jsStrReplacementTable) } // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression // specials so the result is treated literally when included in a regular // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by // the literal text of {{.X}} followed by the string "bar". func jsRegexpEscaper(args ...interface{}) string { s, _ := stringify(args...) s = replace(s, jsRegexpReplacementTable) if s == "" { // /{{.X}}/ should not produce a line comment when .X == "". return "(?:)" } return s } // replace replaces each rune r of s with replacementTable[r], provided that // r < len(replacementTable). If replacementTable[r] is the empty string then // no replacement is made. // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and // `\u2029`. func replace(s string, replacementTable []string) string { var b bytes.Buffer r, w, written := rune(0), 0, 0 for i := 0; i < len(s); i += w { // See comment in htmlEscaper. r, w = utf8.DecodeRuneInString(s[i:]) var repl string switch { case int(r) < len(replacementTable) && replacementTable[r] != "": repl = replacementTable[r] case r == '\u2028': repl = `\u2028` case r == '\u2029': repl = `\u2029` default: continue } b.WriteString(s[written:i]) b.WriteString(repl) written = i + w } if written == 0 { return s } b.WriteString(s[written:]) return b.String() } var jsStrReplacementTable = []string{ 0: `\0`, '\t': `\t`, '\n': `\n`, '\v': `\x0b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\x22`, '&': `\x26`, '\'': `\x27`, '+': `\x2b`, '/': `\/`, '<': `\x3c`, '>': `\x3e`, '\\': `\\`, } // jsStrNormReplacementTable is like jsStrReplacementTable but does not // overencode existing escapes since this table has no entry for `\`. var jsStrNormReplacementTable = []string{ 0: `\0`, '\t': `\t`, '\n': `\n`, '\v': `\x0b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\x22`, '&': `\x26`, '\'': `\x27`, '+': `\x2b`, '/': `\/`, '<': `\x3c`, '>': `\x3e`, } var jsRegexpReplacementTable = []string{ 0: `\0`, '\t': `\t`, '\n': `\n`, '\v': `\x0b`, // "\v" == "v" on IE 6. '\f': `\f`, '\r': `\r`, // Encode HTML specials as hex so the output can be embedded // in HTML attributes without further encoding. '"': `\x22`, '$': `\$`, '&': `\x26`, '\'': `\x27`, '(': `\(`, ')': `\)`, '*': `\*`, '+': `\x2b`, '-': `\-`, '.': `\.`, '/': `\/`, '<': `\x3c`, '>': `\x3e`, '?': `\?`, '[': `\[`, '\\': `\\`, ']': `\]`, '^': `\^`, '{': `\{`, '|': `\|`, '}': `\}`, } // isJSIdentPart reports whether the given rune is a JS identifier part. // It does not handle all the non-Latin letters, joiners, and combining marks, // but it does handle every codepoint that can occur in a numeric literal or // a keyword. func isJSIdentPart(r rune) bool { switch { case r == '$': return true case '0' <= r && r <= '9': return true case 'A' <= r && r <= 'Z': return true case r == '_': return true case 'a' <= r && r <= 'z': return true } return false } // isJSType returns true if the given MIME type should be considered JavaScript. // // It is used to determine whether a script tag with a type attribute is a javascript container. func isJSType(mimeType string) bool { // per // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type // https://tools.ietf.org/html/rfc7231#section-3.1.1 // https://tools.ietf.org/html/rfc4329#section-3 // https://www.ietf.org/rfc/rfc4627.txt // discard parameters if i := strings.Index(mimeType, ";"); i >= 0 { mimeType = mimeType[:i] } mimeType = strings.TrimSpace(mimeType) switch mimeType { case "application/ecmascript", "application/javascript", "application/json", "application/x-ecmascript", "application/x-javascript", "text/ecmascript", "text/javascript", "text/javascript1.0", "text/javascript1.1", "text/javascript1.2", "text/javascript1.3", "text/javascript1.4", "text/javascript1.5", "text/jscript", "text/livescript", "text/x-ecmascript", "text/x-javascript": return true default: return false } }