diff options
Diffstat (limited to 'tests/html5lib-tests/tokenizer/entities.test')
-rw-r--r-- | tests/html5lib-tests/tokenizer/entities.test | 542 |
1 files changed, 542 insertions, 0 deletions
diff --git a/tests/html5lib-tests/tokenizer/entities.test b/tests/html5lib-tests/tokenizer/entities.test new file mode 100644 index 0000000..a6469cd --- /dev/null +++ b/tests/html5lib-tests/tokenizer/entities.test @@ -0,0 +1,542 @@ +{"tests": [ + +{"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.", +"input":"<h a=\"¬i;\">", +"output": [["StartTag", "h", {"a": "¬i;"}]]}, + +{"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.", +"input":"<h a=\"&lang=\">", +"output": [["StartTag", "h", {"a": "&lang="}]]}, + +{"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.", +"input":"<h a=\"¬=\">", +"output": [["StartTag", "h", {"a": "¬="}]]}, + +{"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.", +"input":"<h a='¬i;'>", +"output": [["StartTag", "h", {"a": "¬i;"}]]}, + +{"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.", +"input":"<h a='&lang='>", +"output": [["StartTag", "h", {"a": "&lang="}]]}, + +{"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.", +"input":"<h a='¬='>", +"output": [["StartTag", "h", {"a": "¬="}]]}, + +{"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.", +"input":"<h a=¬i;>", +"output": [["StartTag", "h", {"a": "¬i;"}]]}, + +{"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.", +"input":"<h a=&lang=>", +"output": [["StartTag", "h", {"a": "&lang="}]], +"errors":[ + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 } +]}, + +{"description": "Valid entity name followed by the equals sign in an unquoted attribute value.", +"input":"<h a=¬=>", +"output": [["StartTag", "h", {"a": "¬="}]], +"errors":[ + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 } +]}, + +{"description": "Ambiguous ampersand.", +"input":"&rrrraannddom;", +"output": [["Character", "&rrrraannddom;"]], +"errors":[ + { "code": "unknown-named-character-reference", "line": 1, "col": 14 } +]}, + +{"description": "Semicolonless named entity 'not' followed by 'i;' in body", +"input":"¬i;", +"output": [["Character", "\u00ACi;"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, + +{"description": "Very long undefined named entity in body", +"input":"&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;", +"output": [["Character", "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"]], +"errors":[ + { "code": "unknown-named-character-reference", "line": 1, "col": 950 } +]}, + +{"description": "CR as numeric entity", +"input":"
", +"output": [["Character", "\r"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 7 } +]}, + +{"description": "CR as hexadecimal numeric entity", +"input":"
", +"output": [["Character", "\r"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EURO SIGN numeric entity.", +"input":"€", +"output": [["Character", "\u20AC"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", +"input":"", +"output": [["Character", "\u0081"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", +"input":"‚", +"output": [["Character", "\u201A"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", +"input":"ƒ", +"output": [["Character", "\u0192"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", +"input":"„", +"output": [["Character", "\u201E"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", +"input":"…", +"output": [["Character", "\u2026"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DAGGER numeric entity.", +"input":"†", +"output": [["Character", "\u2020"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DOUBLE DAGGER numeric entity.", +"input":"‡", +"output": [["Character", "\u2021"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", +"input":"ˆ", +"output": [["Character", "\u02C6"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 PER MILLE SIGN numeric entity.", +"input":"‰", +"output": [["Character", "\u2030"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", +"input":"Š", +"output": [["Character", "\u0160"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", +"input":"‹", +"output": [["Character", "\u2039"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", +"input":"Œ", +"output": [["Character", "\u0152"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", +"input":"", +"output": [["Character", "\u008D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", +"input":"Ž", +"output": [["Character", "\u017D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", +"input":"", +"output": [["Character", "\u008F"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", +"input":"", +"output": [["Character", "\u0090"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", +"input":"‘", +"output": [["Character", "\u2018"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", +"input":"’", +"output": [["Character", "\u2019"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", +"input":"“", +"output": [["Character", "\u201C"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", +"input":"”", +"output": [["Character", "\u201D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 BULLET numeric entity.", +"input":"•", +"output": [["Character", "\u2022"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EN DASH numeric entity.", +"input":"–", +"output": [["Character", "\u2013"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EM DASH numeric entity.", +"input":"—", +"output": [["Character", "\u2014"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SMALL TILDE numeric entity.", +"input":"˜", +"output": [["Character", "\u02DC"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 TRADE MARK SIGN numeric entity.", +"input":"™", +"output": [["Character", "\u2122"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", +"input":"š", +"output": [["Character", "\u0161"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", +"input":"›", +"output": [["Character", "\u203A"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", +"input":"œ", +"output": [["Character", "\u0153"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", +"input":"", +"output": [["Character", "\u009D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", +"input":"€", +"output": [["Character", "\u20AC"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", +"input":"", +"output": [["Character", "\u0081"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", +"input":"‚", +"output": [["Character", "\u201A"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", +"input":"ƒ", +"output": [["Character", "\u0192"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", +"input":"„", +"output": [["Character", "\u201E"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", +"input":"…", +"output": [["Character", "\u2026"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DAGGER hexadecimal numeric entity.", +"input":"†", +"output": [["Character", "\u2020"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", +"input":"‡", +"output": [["Character", "\u2021"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", +"input":"ˆ", +"output": [["Character", "\u02C6"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", +"input":"‰", +"output": [["Character", "\u2030"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", +"input":"Š", +"output": [["Character", "\u0160"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", +"input":"‹", +"output": [["Character", "\u2039"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", +"input":"Œ", +"output": [["Character", "\u0152"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", +"input":"", +"output": [["Character", "\u008D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", +"input":"Ž", +"output": [["Character", "\u017D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", +"input":"", +"output": [["Character", "\u008F"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", +"input":"", +"output": [["Character", "\u0090"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", +"input":"‘", +"output": [["Character", "\u2018"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", +"input":"’", +"output": [["Character", "\u2019"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", +"input":"“", +"output": [["Character", "\u201C"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", +"input":"”", +"output": [["Character", "\u201D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 BULLET hexadecimal numeric entity.", +"input":"•", +"output": [["Character", "\u2022"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EN DASH hexadecimal numeric entity.", +"input":"–", +"output": [["Character", "\u2013"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 EM DASH hexadecimal numeric entity.", +"input":"—", +"output": [["Character", "\u2014"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", +"input":"˜", +"output": [["Character", "\u02DC"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", +"input":"™", +"output": [["Character", "\u2122"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", +"input":"š", +"output": [["Character", "\u0161"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", +"input":"›", +"output": [["Character", "\u203A"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", +"input":"œ", +"output": [["Character", "\u0153"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", +"input":"", +"output": [["Character", "\u009D"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", +"input":"ž", +"output": [["Character", "\u017E"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", +"input":"Ÿ", +"output": [["Character", "\u0178"]], +"errors":[ + { "code": "control-character-reference", "line": 1, "col": 8 } +]}, + +{"description": "Decimal numeric entity followed by hex character a.", +"input":"aa", +"output": [["Character", "aa"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, + +{"description": "Decimal numeric entity followed by hex character A.", +"input":"aA", +"output": [["Character", "aA"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, + +{"description": "Decimal numeric entity followed by hex character f.", +"input":"af", +"output": [["Character", "af"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, + +{"description": "Decimal numeric entity followed by hex character A.", +"input":"aF", +"output": [["Character", "aF"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]} + +]} |