cheep-crator-2/vendor/xml5ever/src/driver.rs

90 lines
2.7 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts};
use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
use std::borrow::Cow;
use crate::tendril;
use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
use crate::tendril::StrTendril;
use markup5ever::buffer_queue::BufferQueue;
/// All-encompasing parser setting structure.
#[derive(Clone, Default)]
pub struct XmlParseOpts {
/// Xml tokenizer options.
pub tokenizer: XmlTokenizerOpts,
/// Xml tree builder .
pub tree_builder: XmlTreeBuilderOpts,
}
/// Parse and send results to a `TreeSink`.
///
/// ## Example
///
/// ```ignore
/// let mut sink = MySink;
/// parse_document(&mut sink, iter::once(my_str), Default::default());
/// ```
pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
where
Sink: TreeSink,
{
let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
let tok = XmlTokenizer::new(tb, opts.tokenizer);
XmlParser {
tokenizer: tok,
input_buffer: BufferQueue::new(),
}
}
/// An XML parser,
/// ready to receive Unicode input through the `tendril::TendrilSink` traits methods.
pub struct XmlParser<Sink>
where
Sink: TreeSink,
{
/// Tokenizer used by XmlParser.
pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
/// Input used by XmlParser.
pub input_buffer: BufferQueue,
}
impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
type Output = Sink::Output;
fn process(&mut self, t: StrTendril) {
self.input_buffer.push_back(t);
self.tokenizer.feed(&mut self.input_buffer);
}
// FIXME: Is it too noisy to report every character decoding error?
fn error(&mut self, desc: Cow<'static, str>) {
self.tokenizer.sink.sink.parse_error(desc)
}
fn finish(mut self) -> Self::Output {
self.tokenizer.end();
self.tokenizer.sink.sink.finish()
}
}
impl<Sink: TreeSink> XmlParser<Sink> {
/// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes.
///
/// Use this when your input is bytes that are known to be in the UTF-8 encoding.
/// Decoding is lossy, like `String::from_utf8_lossy`.
#[allow(clippy::wrong_self_convention)]
pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
Utf8LossyDecoder::new(self)
}
}