diff options
Diffstat (limited to 'tests/html5lib-tests/tokenizer/test4.test')
| -rw-r--r-- | tests/html5lib-tests/tokenizer/test4.test | 532 | 
1 files changed, 532 insertions, 0 deletions
| diff --git a/tests/html5lib-tests/tokenizer/test4.test b/tests/html5lib-tests/tokenizer/test4.test new file mode 100644 index 0000000..8963c74 --- /dev/null +++ b/tests/html5lib-tests/tokenizer/test4.test @@ -0,0 +1,532 @@ +{"tests": [ + +{"description":"< in attribute name", +"input":"<z/0  <>", +"output":[["StartTag", "z", {"0": "", "<": ""}]], +"errors":[ +    { "code": "unexpected-solidus-in-tag", "line": 1, "col": 4 }, +    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 } +]}, + +{"description":"< in unquoted attribute value", +"input":"<z x=<>", +"output":[["StartTag", "z", {"x": "<"}]], +"errors":[ +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } +]}, + +{"description":"= in unquoted attribute value", +"input":"<z z=z=z>", +"output":[["StartTag", "z", {"z": "z=z"}]], +"errors":[ +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } +]}, + +{"description":"= attribute", +"input":"<z =>", +"output":[["StartTag", "z", {"=": ""}]], +"errors":[ +    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 } +]}, + +{"description":"== attribute", +"input":"<z ==>", +"output":[["StartTag", "z", {"=": ""}]], +"errors":[ +    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, +    { "code": "missing-attribute-value", "line": 1, "col": 6 } +]}, + +{"description":"=== attribute", +"input":"<z ===>", +"output":[["StartTag", "z", {"=": "="}]], +"errors":[ +    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } +]}, + +{"description":"==== attribute", +"input":"<z ====>", +"output":[["StartTag", "z", {"=": "=="}]], +"errors":[ +    { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }, +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } +]}, + +{"description":"\" after ampersand in double-quoted attribute value", +"input":"<z z=\"&\">", +"output":[["StartTag", "z", {"z": "&"}]]}, + +{"description":"' after ampersand in double-quoted attribute value", +"input":"<z z=\"&'\">", +"output":[["StartTag", "z", {"z": "&'"}]]}, + +{"description":"' after ampersand in single-quoted attribute value", +"input":"<z z='&'>", +"output":[["StartTag", "z", {"z": "&"}]]}, + +{"description":"\" after ampersand in single-quoted attribute value", +"input":"<z z='&\"'>", +"output":[["StartTag", "z", {"z": "&\""}]]}, + +{"description":"Text after bogus character reference", +"input":"<z z='&xlink_xmlns;'>bar<z>", +"output":[["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]}, + +{"description":"Text after hex character reference", +"input":"<z z='  foo'>bar<z>", +"output":[["StartTag","z",{"z":"  foo"}],["Character","bar"],["StartTag","z",{}]]}, + +{"description":"Attribute name starting with \"", +"input":"<foo \"='bar'>", +"output":[["StartTag", "foo", {"\"": "bar"}]], +"errors":[ +    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 } +]}, + +{"description":"Attribute name starting with '", +"input":"<foo '='bar'>", +"output":[["StartTag", "foo", {"'": "bar"}]], +"errors":[ +    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 6 } +]}, + +{"description":"Attribute name containing \"", +"input":"<foo a\"b='bar'>", +"output":[["StartTag", "foo", {"a\"b": "bar"}]], +"errors":[ +    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 } +]}, + +{"description":"Attribute name containing '", +"input":"<foo a'b='bar'>", +"output":[["StartTag", "foo", {"a'b": "bar"}]], +"errors":[ +    { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 } +]}, + +{"description":"Unquoted attribute value containing '", +"input":"<foo a=b'c>", +"output":[["StartTag", "foo", {"a": "b'c"}]], +"errors":[ +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 } +]}, + + +{"description":"Unquoted attribute value containing \"", +"input":"<foo a=b\"c>", +"output":[["StartTag", "foo", {"a": "b\"c"}]], +"errors":[ +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 } +]}, + +{"description":"Double-quoted attribute value not followed by whitespace", +"input":"<foo a=\"b\"c>", +"output":[["StartTag", "foo", {"a": "b", "c": ""}]], +"errors":[ +    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 } +]}, + +{"description":"Single-quoted attribute value not followed by whitespace", +"input":"<foo a='b'c>", +"output":[["StartTag", "foo", {"a": "b", "c": ""}]], +"errors":[ +    { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 } +]}, + +{"description":"Quoted attribute followed by permitted /", +"input":"<br a='b'/>", +"output":[["StartTag","br",{"a":"b"},true]]}, + +{"description":"Quoted attribute followed by non-permitted /", +"input":"<bar a='b'/>", +"output":[["StartTag","bar",{"a":"b"},true]]}, + +{"description":"CR EOF after doctype name", +"input":"<!doctype html \r", +"output":[["DOCTYPE", "html", null, null, false]], +"errors":[ +    { "code": "eof-in-doctype", "line": 2, "col": 1 } +]}, + +{"description":"CR EOF in tag name", +"input":"<z\r", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 2, "col": 1 } +]}, + +{"description":"Slash EOF in tag name", +"input":"<z/", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 4 } +]}, + +{"description":"Zero hex numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }, +    { "code": "null-character-reference", "line": 1, "col": 5 } +]}, + +{"description":"Zero decimal numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 }, +    { "code": "null-character-reference", "line": 1, "col": 4 } +]}, + +{"description":"Zero-prefixed hex numeric entity", +"input":"A", +"output":[["Character", "A"]]}, + +{"description":"Zero-prefixed decimal numeric entity", +"input":"A", +"output":[["Character", "A"]]}, + +{"description":"Empty hex numeric entities", +"input":"&#x &#X ", +"output":[["Character", "&#x &#X "]], +"errors":[ +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }, +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 } +]}, + +{"description":"Invalid digit in hex numeric entity", +"input":"&#xZ", +"output":[["Character", "&#xZ"]], +"errors":[ +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 } +]}, + +{"description":"Empty decimal numeric entities", +"input":"&# &#; ", +"output":[["Character", "&# &#; "]], +"errors":[ +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }, +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 } +]}, + +{"description":"Invalid digit in decimal numeric entity", +"input":"&#A", +"output":[["Character", "&#A"]], +"errors":[ +    { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 } +]}, + +{"description":"Non-BMP numeric entity", +"input":"𐀀", +"output":[["Character", "\uD800\uDC00"]]}, + +{"description":"Maximum non-BMP numeric entity", +"input":"", +"output":[["Character", "\uDBFF\uDFFF"]], +"errors":[ +    { "code": "noncharacter-character-reference", "line": 1, "col": 11 } +]}, + + +{"description":"Above maximum numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 11 } +]}, + +{"description":"32-bit hex numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 13 } +]}, + +{"description":"33-bit hex numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 } +]}, + +{"description":"33-bit decimal numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 14 } +]}, + +{"description":"65-bit hex numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 22 } +]}, + +{"description":"65-bit decimal numeric entity", +"input":"�", +"output":[["Character", "\uFFFD"]], +"errors":[ +    { "code": "character-reference-outside-unicode-range", "line": 1, "col": 24 } +]}, + +{"description":"Surrogate code point edge cases", +"input":"퟿����", +"output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]], +"errors":[ +    { "code": "surrogate-character-reference", "line": 1, "col": 17 }, +    { "code": "surrogate-character-reference", "line": 1, "col": 25 }, +    { "code": "surrogate-character-reference", "line": 1, "col": 33 }, +    { "code": "surrogate-character-reference", "line": 1, "col": 41 } +]}, + +{"description":"Uppercase start tag name", +"input":"<X>", +"output":[["StartTag", "x", {}]]}, + +{"description":"Uppercase end tag name", +"input":"</X>", +"output":[["EndTag", "x"]]}, + +{"description":"Uppercase attribute name", +"input":"<x X>", +"output":[["StartTag", "x", { "x":"" }]]}, + +{"description":"Tag/attribute name case edge values", +"input":"<x@AZ[`az{ @AZ[`az{>", +"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]}, + +{"description":"Duplicate different-case attributes", +"input":"<x x=1 x=2 X=3>", +"output":[["StartTag", "x", { "x":"1" }]], +"errors":[ +    { "code": "duplicate-attribute", "line": 1, "col": 9 }, +    { "code": "duplicate-attribute", "line": 1, "col": 13 } +]}, + +{"description":"Uppercase close tag attributes", +"input":"</x X>", +"output":[["EndTag", "x"]], +"errors":[ +    { "code": "end-tag-with-attributes", "line": 1, "col": 6 } +]}, + +{"description":"Duplicate close tag attributes", +"input":"</x x x>", +"output":[["EndTag", "x"]], +"errors":[ +    { "code": "duplicate-attribute", "line": 1, "col": 8 }, +    { "code": "end-tag-with-attributes", "line": 1, "col": 8 } +]}, + +{"description":"Permitted slash", +"input":"<br/>", +"output":[["StartTag","br",{},true]]}, + +{"description":"Non-permitted slash", +"input":"<xr/>", +"output":[["StartTag","xr",{},true]]}, + +{"description":"Permitted slash but in close tag", +"input":"</br/>", +"output":[["EndTag", "br"]], +"errors":[ +    { "code": "end-tag-with-trailing-solidus", "line": 1, "col": 6 } +]}, + +{"description":"Doctype public case-sensitivity (1)", +"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">", +"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]}, + +{"description":"Doctype public case-sensitivity (2)", +"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">", +"output":[["DOCTYPE", "html", "aBc", "xYz", true]]}, + +{"description":"Doctype system case-sensitivity (1)", +"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">", +"output":[["DOCTYPE", "html", null, "XyZ", true]]}, + +{"description":"Doctype system case-sensitivity (2)", +"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">", +"output":[["DOCTYPE", "html", null, "xYz", true]]}, + +{"description":"U+0000 in lookahead region after non-matching character", +"input":"<!doc>\u0000", +"output":[["Comment", "doc"], ["Character", "\u0000"]], +"errors":[ +    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }, +    { "code": "unexpected-null-character", "line": 1, "col": 7 } +]}, + +{"description":"U+0000 in lookahead region", +"input":"<!doc\u0000", +"output":[["Comment", "doc\uFFFD"]], +"errors":[ +    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }, +    { "code": "unexpected-null-character", "line": 1, "col": 6 } +]}, + +{"description":"U+0080 in lookahead region", +"input":"<!doc\u0080", +"output":[["Comment", "doc\u0080"]], +"errors":[ +    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }, +    { "code": "control-character-in-input-stream", "line": 1, "col": 6 } +]}, + +{"description":"U+FDD1 in lookahead region", +"input":"<!doc\uFDD1", +"output":[["Comment", "doc\uFDD1"]], +"errors":[ +    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }, +    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 } +]}, + +{"description":"U+1FFFF in lookahead region", +"input":"<!doc\uD83F\uDFFF", +"output":[["Comment", "doc\uD83F\uDFFF"]], +"errors":[ +    { "code": "incorrectly-opened-comment", "line": 1, "col": 3 }, +    { "code": "noncharacter-in-input-stream", "line": 1, "col": 6 } +]}, + +{"description":"CR followed by non-LF", +"input":"\r?", +"output":[["Character", "\n?"]]}, + +{"description":"CR at EOF", +"input":"\r", +"output":[["Character", "\n"]]}, + +{"description":"LF at EOF", +"input":"\n", +"output":[["Character", "\n"]]}, + +{"description":"CR LF", +"input":"\r\n", +"output":[["Character", "\n"]]}, + +{"description":"CR CR", +"input":"\r\r", +"output":[["Character", "\n\n"]]}, + +{"description":"LF LF", +"input":"\n\n", +"output":[["Character", "\n\n"]]}, + +{"description":"LF CR", +"input":"\n\r", +"output":[["Character", "\n\n"]]}, + +{"description":"text CR CR CR text", +"input":"text\r\r\rtext", +"output":[["Character", "text\n\n\ntext"]]}, + +{"description":"Doctype publik", +"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">", +"output":[["DOCTYPE", "html", null, null, false]], +"errors":[ +    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, + +{"description":"Doctype publi", +"input":"<!DOCTYPE html PUBLI", +"output":[["DOCTYPE", "html", null, null, false]], +"errors":[ +    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, + +{"description":"Doctype sistem", +"input":"<!DOCTYPE html SISTEM \"AbC\">", +"output":[["DOCTYPE", "html", null, null, false]], +"errors":[ +    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, + +{"description":"Doctype sys", +"input":"<!DOCTYPE html SYS", +"output":[["DOCTYPE", "html", null, null, false]], +"errors":[ +    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, + +{"description":"Doctype html x>text", +"input":"<!DOCTYPE html x>text", +"output":[["DOCTYPE", "html", null, null, false], ["Character", "text"]], +"errors":[ +    { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, + +{"description":"Grave accent in unquoted attribute", +"input":"<a a=aa`>", +"output":[["StartTag", "a", {"a":"aa`"}]], +"errors":[ +    { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 } +]}, + +{"description":"EOF in tag name state ", +"input":"<a", +"output":[], +"errors": [ +    { "code": "eof-in-tag", "line": 1, "col": 3 } +]}, + +{"description":"EOF in before attribute name state", +"input":"<a ", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 4 } +]}, + +{"description":"EOF in attribute name state", +"input":"<a a", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 5 } +]}, + +{"description":"EOF in after attribute name state", +"input":"<a a ", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 6 } +]}, + +{"description":"EOF in before attribute value state", +"input":"<a a =", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 7 } +]}, + +{"description":"EOF in attribute value (double quoted) state", +"input":"<a a =\"a", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 9 } +]}, + +{"description":"EOF in attribute value (single quoted) state", +"input":"<a a ='a", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 9 } +]}, + +{"description":"EOF in attribute value (unquoted) state", +"input":"<a a =a", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 8 } +]}, + +{"description":"EOF in after attribute value state", +"input":"<a a ='a'", +"output":[], +"errors":[ +    { "code": "eof-in-tag", "line": 1, "col": 10 } +]} + +]} | 
