1 files changed, 78 insertions, 82 deletions
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index a58e388..bcbc6b7 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -9,9 +9,9 @@
 
 //! The HTML5 tokenizer.
 
+pub use self::interface::{Attribute, Doctype, EndTag, StartTag, Tag, TagKind};
 pub use self::interface::{CharacterTokens, EOFToken, NullCharacterToken, ParseError};
 pub use self::interface::{CommentToken, DoctypeToken, TagToken, Token};
-pub use self::interface::{Doctype, EndTag, StartTag, Tag, TagKind, Attribute};
 pub use self::interface::{TokenSink, TokenSinkResult};
 
 use self::states::{DoctypeIdKind, Public, System};
@@ -168,9 +168,7 @@ pub struct Tokenizer<Sink> {
 impl<Sink: TokenSink> Tokenizer<Sink> {
     /// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
     pub fn new(sink: Sink, mut opts: TokenizerOpts) -> Tokenizer<Sink> {
-        let start_tag_name = opts
-            .last_start_tag_name
-            .take();
+        let start_tag_name = opts.last_start_tag_name.take();
         let state = opts.initial_state.unwrap_or(states::Data);
         let discard_bom = opts.discard_bom;
         Tokenizer {
@@ -259,8 +257,8 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             self.current_line += 1;
         }
 
-        if self.opts.exact_errors &&
-            match c as u32 {
+        if self.opts.exact_errors
+            && match c as u32 {
                 0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true,
                 n if (n & 0xFFFE) == 0xFFFE => true,
                 _ => false,
@@ -326,7 +324,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     self.temp_buf.push(c);
                 }
                 None
-            },
+            }
             Some(matched) => Some(matched),
         }
     }
@@ -343,7 +341,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     Some(x) => {
                         *x += dt;
                         false
-                    },
+                    }
                     None => true,
                 };
                 if new {
@@ -410,7 +408,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
         match self.current_tag_kind {
             StartTag => {
                 self.last_start_tag_name = Some(name.clone());
-            },
+            }
             EndTag => {
                 if !self.current_tag_attrs.is_empty() {
                     self.emit_error(Borrowed("Attributes on an end tag"));
@@ -418,7 +416,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 if self.current_tag_self_closing {
                     self.emit_error(Borrowed("Self-closing end tag"));
                 }
-            },
+            }
         }
 
         let token = TagToken(Tag {
@@ -433,15 +431,15 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             TokenSinkResult::Plaintext => {
                 self.state = states::Plaintext;
                 ProcessResult::Continue
-            },
+            }
             TokenSinkResult::Script(node) => {
                 self.state = states::Data;
                 ProcessResult::Script(node)
-            },
+            }
             TokenSinkResult::RawData(kind) => {
                 self.state = states::RawData(kind);
                 ProcessResult::Continue
-            },
+            }
         }
     }
 
@@ -496,9 +494,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
         // FIXME: linear time search, do we care?
         let dup = {
             let name = &*self.current_attr_name;
-            self.current_tag_attrs
-                .iter()
-                .any(|a| &*a.name == name)
+            self.current_tag_attrs.iter().any(|a| &*a.name == name)
         };
 
         if dup {
@@ -740,7 +736,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     FromSet('-') => go!(self: emit '-'; to ScriptDataEscapedDash DoubleEscaped),
                     FromSet('<') => {
                         go!(self: emit '<'; to RawLessThanSign ScriptDataEscaped DoubleEscaped)
-                    },
+                    }
                     FromSet(c) => go!(self: emit c),
                     NotFromSet(b) => self.emit_chars(b),
                 }
@@ -774,7 +770,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '>' => go!(self: error; to Data),
                     '\0' => {
                         go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment)
-                    },
+                    }
                     c => match lower_ascii_letter(c) {
                         Some(cl) => go!(self: create_tag EndTag cl; to TagName),
                         None => go!(self: error; clear_comment; push_comment c; to BogusComment),
@@ -820,7 +816,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '/' => go!(self: clear_temp; to RawEndTagOpen kind),
                     '!' if kind == ScriptData => {
                         go!(self: emit '<'; emit '!'; to ScriptDataEscapeStart Escaped)
-                    },
+                    }
                     _ => go!(self: emit '<'; reconsume RawData kind),
                 }
             },
@@ -850,7 +846,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     Some(cl) => go!(self: push_tag cl; push_temp c),
                     None => {
                         go!(self: discard_tag; emit '<'; emit '/'; emit_temp; reconsume RawData kind)
-                    },
+                    }
                 }
             },
 
@@ -865,7 +861,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             Escaped
                         };
                         go!(self: emit c; to RawData ScriptDataEscaped esc);
-                    },
+                    }
                     _ => match lower_ascii_letter(c) {
                         Some(cl) => go!(self: push_temp cl; emit c),
                         None => go!(self: reconsume RawData ScriptDataEscaped Escaped),
@@ -898,7 +894,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             go!(self: emit '<');
                         }
                         go!(self: to RawLessThanSign ScriptDataEscaped kind);
-                    },
+                    }
                     '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
                     c => go!(self: emit c; to RawData ScriptDataEscaped kind),
                 }
@@ -913,7 +909,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             go!(self: emit '<');
                         }
                         go!(self: to RawLessThanSign ScriptDataEscaped kind);
-                    },
+                    }
                     '>' => go!(self: emit '>'; to RawData ScriptData),
                     '\0' => go!(self: error; emit '\u{fffd}'; to RawData ScriptDataEscaped kind),
                     c => go!(self: emit c; to RawData ScriptDataEscaped kind),
@@ -931,7 +927,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             DoubleEscaped
                         };
                         go!(self: emit c; to RawData ScriptDataEscaped esc);
-                    },
+                    }
                     _ => match lower_ascii_letter(c) {
                         Some(cl) => go!(self: push_temp cl; emit c),
                         None => go!(self: reconsume RawData ScriptDataEscaped DoubleEscaped),
@@ -952,7 +948,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             go_match!(self: c,
                             '"' , '\'' , '<' , '=' => error);
                             go!(self: create_attr c; to AttributeName);
-                        },
+                        }
                     },
                 }
             },
@@ -971,7 +967,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             go_match!(self: c,
                             '"' , '\'' , '<' => error);
                             go!(self: push_name c);
-                        },
+                        }
                     },
                 }
             },
@@ -990,7 +986,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                             go_match!(self: c,
                             '"' , '\'' , '<' => error);
                             go!(self: create_attr c; to AttributeName);
-                        },
+                        }
                     },
                 }
             },
@@ -1005,7 +1001,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted),
                     '\0' => {
                         go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted)
-                    },
+                    }
                     '>' => go!(self: discard_char input; error; emit_tag Data),
                     _ => go!(self: to AttributeValue Unquoted),
                 }
@@ -1042,7 +1038,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 ) {
                     FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => {
                         go!(self: to BeforeAttributeName)
-                    },
+                    }
                     FromSet('&') => go!(self: consume_char_ref '>'),
                     FromSet('>') => go!(self: emit_tag Data),
                     FromSet('\0') => go!(self: error; push_value '\u{fffd}'),
@@ -1050,7 +1046,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                         go_match!(self: c,
                             '"' , '\'' , '<' , '=' , '`' => error);
                         go!(self: push_value c);
-                    },
+                    }
                     NotFromSet(ref b) => go!(self: append_value b),
                 }
             },
@@ -1071,7 +1067,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '>' => {
                         self.current_tag_self_closing = true;
                         go!(self: emit_tag Data);
-                    },
+                    }
                     _ => go!(self: error; reconsume BeforeAttributeName),
                 }
             },
@@ -1149,7 +1145,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '\t' | '\n' | '\x0C' | ' ' => (),
                     '\0' => {
                         go!(self: error; create_doctype; push_doctype_name '\u{fffd}'; to DoctypeName)
-                    },
+                    }
                     '>' => go!(self: error; create_doctype; force_quirks; emit_doctype; to Data),
                     c => go!(self: create_doctype; push_doctype_name (c.to_ascii_lowercase());
                                   to DoctypeName),
@@ -1187,10 +1183,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeIdentifier kind),
                     '"' => {
                         go!(self: error; clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind)
-                    },
+                    }
                     '\'' => {
                         go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind)
-                    },
+                    }
                     '>' => go!(self: error; force_quirks; emit_doctype; to Data),
                     _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
@@ -1232,14 +1228,14 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                 match get_char!(self, input) {
                     '\t' | '\n' | '\x0C' | ' ' => {
                         go!(self: to BetweenDoctypePublicAndSystemIdentifiers)
-                    },
+                    }
                     '>' => go!(self: emit_doctype; to Data),
                     '"' => {
                         go!(self: error; clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
-                    },
+                    }
                     '\'' => {
                         go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
-                    },
+                    }
                     _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
             },
@@ -1260,10 +1256,10 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
                     '>' => go!(self: emit_doctype; to Data),
                     '"' => {
                         go!(self: clear_doctype_id System; to DoctypeIdentifierDoubleQuoted System)
-                    },
+                    }
                     '\'' => {
                         go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System)
-                    },
+                    }
                     _ => go!(self: error; force_quirks; to BogusDoctype),
                 }
             },
@@ -1341,7 +1337,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             char_ref::Done => {
                 self.process_char_ref(tok.get_result());
                 return ProcessResult::Continue;
-            },
+            }
 
             char_ref::Stuck => ProcessResult::Suspend,
             char_ref::Progress => ProcessResult::Continue,
@@ -1387,7 +1383,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
             Some(mut tok) => {
                 tok.end_of_file(self, &mut input);
                 self.process_char_ref(tok.get_result());
-            },
+            }
         }
 
         // Process all remaining buffered input.
@@ -1432,23 +1428,23 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
 
     fn eof_step(&mut self) -> ProcessResult<Sink::Handle> {
         match self.state {
-            states::Data |
-            states::RawData(Rcdata) |
-            states::RawData(Rawtext) |
-            states::RawData(ScriptData) |
-            states::Plaintext => go!(self: eof),
-
-            states::TagName |
-            states::RawData(ScriptDataEscaped(_)) |
-            states::BeforeAttributeName |
-            states::AttributeName |
-            states::AfterAttributeName |
-            states::BeforeAttributeValue |
-            states::AttributeValue(_) |
-            states::AfterAttributeValueQuoted |
-            states::SelfClosingStartTag |
-            states::ScriptDataEscapedDash(_) |
-            states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data),
+            states::Data
+            | states::RawData(Rcdata)
+            | states::RawData(Rawtext)
+            | states::RawData(ScriptData)
+            | states::Plaintext => go!(self: eof),
+
+            states::TagName
+            | states::RawData(ScriptDataEscaped(_))
+            | states::BeforeAttributeName
+            | states::AttributeName
+            | states::AfterAttributeName
+            | states::BeforeAttributeValue
+            | states::AttributeValue(_)
+            | states::AfterAttributeValueQuoted
+            | states::SelfClosingStartTag
+            | states::ScriptDataEscapedDash(_)
+            | states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data),
 
             states::TagOpen => go!(self: error_eof; emit '<'; to Data),
 
@@ -1456,7 +1452,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
 
             states::RawLessThanSign(ScriptDataEscaped(DoubleEscaped)) => {
                 go!(self: to RawData ScriptDataEscaped DoubleEscaped)
-            },
+            }
 
             states::RawLessThanSign(kind) => go!(self: emit '<'; to RawData kind),
 
@@ -1464,7 +1460,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
 
             states::RawEndTagName(kind) => {
                 go!(self: emit '<'; emit '/'; emit_temp; to RawData kind)
-            },
+            }
 
             states::ScriptDataEscapeStart(kind) => go!(self: to RawData ScriptDataEscaped kind),
 
@@ -1472,29 +1468,29 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
 
             states::ScriptDataDoubleEscapeEnd => {
                 go!(self: to RawData ScriptDataEscaped DoubleEscaped)
-            },
+            }
 
-            states::CommentStart |
-            states::CommentStartDash |
-            states::Comment |
-            states::CommentEndDash |
-            states::CommentEnd |
-            states::CommentEndBang => go!(self: error_eof; emit_comment; to Data),
+            states::CommentStart
+            | states::CommentStartDash
+            | states::Comment
+            | states::CommentEndDash
+            | states::CommentEnd
+            | states::CommentEndBang => go!(self: error_eof; emit_comment; to Data),
 
             states::Doctype | states::BeforeDoctypeName => {
                 go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data)
-            },
+            }
 
-            states::DoctypeName |
-            states::AfterDoctypeName |
-            states::AfterDoctypeKeyword(_) |
-            states::BeforeDoctypeIdentifier(_) |
-            states::DoctypeIdentifierDoubleQuoted(_) |
-            states::DoctypeIdentifierSingleQuoted(_) |
-            states::AfterDoctypeIdentifier(_) |
-            states::BetweenDoctypePublicAndSystemIdentifiers => {
+            states::DoctypeName
+            | states::AfterDoctypeName
+            | states::AfterDoctypeKeyword(_)
+            | states::BeforeDoctypeIdentifier(_)
+            | states::DoctypeIdentifierDoubleQuoted(_)
+            | states::DoctypeIdentifierSingleQuoted(_)
+            | states::AfterDoctypeIdentifier(_)
+            | states::BetweenDoctypePublicAndSystemIdentifiers => {
                 go!(self: error_eof; force_quirks; emit_doctype; to Data)
-            },
+            }
 
             states::BogusDoctype => go!(self: emit_doctype; to Data),
 
@@ -1567,15 +1563,15 @@ mod test {
             match token {
                 CharacterTokens(b) => {
                     self.current_str.push_str(&b);
-                },
+                }
 
                 NullCharacterToken => {
                     self.current_str.push('\0');
-                },
+                }
 
                 ParseError(_) => {
                     panic!("unexpected parse error");
-                },
+                }
 
                 TagToken(mut t) => {
                     // The spec seems to indicate that one can emit
@@ -1585,11 +1581,11 @@ mod test {
                         EndTag => {
                             t.self_closing = false;
                             t.attrs = vec![];
-                        },
+                        }
                         _ => t.attrs.sort_by(|a1, a2| a1.name.cmp(&a2.name)),
                     }
                     self.push(TagToken(t), line_number);
-                },
+                }
 
                 EOFToken => (),