{"tests": [ {"description": "Undefined named entity in a double-quoted attribute value ending in semicolon and whose name starts with a known entity name.", "input":"", "output": [["StartTag", "h", {"a": "¬i;"}]]}, {"description": "Entity name requiring semicolon instead followed by the equals sign in a double-quoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "&lang="}]]}, {"description": "Valid entity name followed by the equals sign in a double-quoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "¬="}]]}, {"description": "Undefined named entity in a single-quoted attribute value ending in semicolon and whose name starts with a known entity name.", "input":"", "output": [["StartTag", "h", {"a": "¬i;"}]]}, {"description": "Entity name requiring semicolon instead followed by the equals sign in a single-quoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "&lang="}]]}, {"description": "Valid entity name followed by the equals sign in a single-quoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "¬="}]]}, {"description": "Undefined named entity in an unquoted attribute value ending in semicolon and whose name starts with a known entity name.", "input":"", "output": [["StartTag", "h", {"a": "¬i;"}]]}, {"description": "Entity name requiring semicolon instead followed by the equals sign in an unquoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "&lang="}]], "errors":[ { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 11 } ]}, {"description": "Valid entity name followed by the equals sign in an unquoted attribute value.", "input":"", "output": [["StartTag", "h", {"a": "¬="}]], "errors":[ { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 10 } ]}, {"description": "Ambiguous ampersand.", "input":"&rrrraannddom;", "output": [["Character", "&rrrraannddom;"]], "errors":[ { "code": "unknown-named-character-reference", "line": 1, "col": 14 } ]}, {"description": "Semicolonless named entity 'not' followed by 'i;' in body", "input":"¬i;", "output": [["Character", "\u00ACi;"]], "errors":[ { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } ]}, {"description": "Very long undefined named entity in body", "input":"&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;", "output": [["Character", "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"]], "errors":[ { "code": "unknown-named-character-reference", "line": 1, "col": 950 } ]}, {"description": "CR as numeric entity", "input":" ", "output": [["Character", "\r"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 7 } ]}, {"description": "CR as hexadecimal numeric entity", "input":" ", "output": [["Character", "\r"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EURO SIGN numeric entity.", "input":"€", "output": [["Character", "\u20AC"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": [["Character", "\u0081"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", "input":"‚", "output": [["Character", "\u201A"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", "input":"ƒ", "output": [["Character", "\u0192"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", "input":"„", "output": [["Character", "\u201E"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", "input":"…", "output": [["Character", "\u2026"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DAGGER numeric entity.", "input":"†", "output": [["Character", "\u2020"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE DAGGER numeric entity.", "input":"‡", "output": [["Character", "\u2021"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", "input":"ˆ", "output": [["Character", "\u02C6"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 PER MILLE SIGN numeric entity.", "input":"‰", "output": [["Character", "\u2030"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", "input":"Š", "output": [["Character", "\u0160"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"‹", "output": [["Character", "\u2039"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", "input":"Œ", "output": [["Character", "\u0152"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": [["Character", "\u008D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", "input":"Ž", "output": [["Character", "\u017D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": [["Character", "\u008F"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": [["Character", "\u0090"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", "input":"‘", "output": [["Character", "\u2018"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", "input":"’", "output": [["Character", "\u2019"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", "input":"“", "output": [["Character", "\u201C"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", "input":"”", "output": [["Character", "\u201D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 BULLET numeric entity.", "input":"•", "output": [["Character", "\u2022"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EN DASH numeric entity.", "input":"–", "output": [["Character", "\u2013"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EM DASH numeric entity.", "input":"—", "output": [["Character", "\u2014"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SMALL TILDE numeric entity.", "input":"˜", "output": [["Character", "\u02DC"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 TRADE MARK SIGN numeric entity.", "input":"™", "output": [["Character", "\u2122"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", "input":"š", "output": [["Character", "\u0161"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"›", "output": [["Character", "\u203A"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", "input":"œ", "output": [["Character", "\u0153"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": [["Character", "\u009D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", "input":"€", "output": [["Character", "\u20AC"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": [["Character", "\u0081"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"‚", "output": [["Character", "\u201A"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", "input":"ƒ", "output": [["Character", "\u0192"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"„", "output": [["Character", "\u201E"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", "input":"…", "output": [["Character", "\u2026"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DAGGER hexadecimal numeric entity.", "input":"†", "output": [["Character", "\u2020"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", "input":"‡", "output": [["Character", "\u2021"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", "input":"ˆ", "output": [["Character", "\u02C6"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", "input":"‰", "output": [["Character", "\u2030"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", "input":"Š", "output": [["Character", "\u0160"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‹", "output": [["Character", "\u2039"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", "input":"Œ", "output": [["Character", "\u0152"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": [["Character", "\u008D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"Ž", "output": [["Character", "\u017D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": [["Character", "\u008F"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": [["Character", "\u0090"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‘", "output": [["Character", "\u2018"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"’", "output": [["Character", "\u2019"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"“", "output": [["Character", "\u201C"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"”", "output": [["Character", "\u201D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 BULLET hexadecimal numeric entity.", "input":"•", "output": [["Character", "\u2022"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EN DASH hexadecimal numeric entity.", "input":"–", "output": [["Character", "\u2013"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EM DASH hexadecimal numeric entity.", "input":"—", "output": [["Character", "\u2014"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", "input":"˜", "output": [["Character", "\u02DC"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", "input":"™", "output": [["Character", "\u2122"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", "input":"š", "output": [["Character", "\u0161"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"›", "output": [["Character", "\u203A"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", "input":"œ", "output": [["Character", "\u0153"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": [["Character", "\u009D"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"ž", "output": [["Character", "\u017E"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", "input":"Ÿ", "output": [["Character", "\u0178"]], "errors":[ { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Decimal numeric entity followed by hex character a.", "input":"aa", "output": [["Character", "aa"]], "errors":[ { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } ]}, {"description": "Decimal numeric entity followed by hex character A.", "input":"aA", "output": [["Character", "aA"]], "errors":[ { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } ]}, {"description": "Decimal numeric entity followed by hex character f.", "input":"af", "output": [["Character", "af"]], "errors":[ { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } ]}, {"description": "Decimal numeric entity followed by hex character A.", "input":"aF", "output": [["Character", "aF"]], "errors":[ { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } ]} ]}