// Copyright 2014-2017 The html5ever Project Developers. See the // COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! A simple reference-counted DOM. //! //! This is sufficient as a static parse tree, but don't build a //! web browser using it. :) //! //! A DOM is a [tree structure] with ordered children that can be represented in an XML-like //! format. For example, the following graph //! //! ```text //! div //! +- "text node" //! +- span //! ``` //! in HTML would be serialized as //! //! ```html //!
text node
//! ``` //! //! See the [document object model article on wikipedia][dom wiki] for more information. //! //! This implementation stores the information associated with each node once, and then hands out //! refs to children. The nodes themselves are reference-counted to avoid copying - you can create //! a new ref and then a node will outlive the document. Nodes own their children, but only have //! weak references to their parents. //! //! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) //! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model extern crate markup5ever; extern crate tendril; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashSet; use std::default::Default; use std::fmt; use std::io; use std::mem; use std::rc::{Rc, Weak}; use tendril::StrTendril; use markup5ever::interface::tree_builder; use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use markup5ever::serialize::TraversalScope; use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode}; use markup5ever::serialize::{Serialize, Serializer}; use markup5ever::Attribute; use markup5ever::ExpandedName; use markup5ever::QualName; /// The different kinds of nodes in the DOM. #[derive(Debug)] pub enum NodeData { /// The `Document` itself - the root node of a HTML document. Document, /// A `DOCTYPE` with name, public id, and system id. See /// [document type declaration on wikipedia][dtd wiki]. /// /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration Doctype { name: StrTendril, public_id: StrTendril, system_id: StrTendril, }, /// A text node. Text { contents: RefCell }, /// A comment. Comment { contents: StrTendril }, /// An element with attributes. Element { name: QualName, attrs: RefCell>, /// For HTML \ elements, the [template contents]. /// /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents template_contents: Option, /// Whether the node is a [HTML integration point]. /// /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point mathml_annotation_xml_integration_point: bool, }, /// A Processing instruction. ProcessingInstruction { target: StrTendril, contents: StrTendril, }, } /// A DOM node. pub struct Node { /// Parent node. pub parent: Cell>, /// Child nodes of this node. pub children: RefCell>, /// Represents this node's data. pub data: NodeData, } impl Node { /// Create a new node from its contents pub fn new(data: NodeData) -> Rc { Rc::new(Node { data: data, parent: Cell::new(None), children: RefCell::new(Vec::new()), }) } } impl Drop for Node { fn drop(&mut self) { let mut nodes = mem::replace(&mut *self.children.borrow_mut(), vec![]); while let Some(node) = nodes.pop() { let children = mem::replace(&mut *node.children.borrow_mut(), vec![]); nodes.extend(children.into_iter()); } } } impl fmt::Debug for Node { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("Node") .field("data", &self.data) .field("children", &self.children) .finish() } } /// Reference to a DOM node. pub type Handle = Rc; /// Weak reference to a DOM node, used for parent pointers. pub type WeakHandle = Weak; /// Append a parentless node to another nodes' children fn append(new_parent: &Handle, child: Handle) { let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); // Invariant: child cannot have existing parent assert!(previous_parent.is_none()); new_parent.children.borrow_mut().push(child); } /// If the node has a parent, get it and this node's position in its children fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { if let Some(weak) = target.parent.take() { let parent = weak.upgrade().expect("dangling weak pointer"); target.parent.set(Some(weak)); let i = match parent .children .borrow() .iter() .enumerate() .find(|&(_, child)| Rc::ptr_eq(&child, &target)) { Some((i, _)) => i, None => panic!("have parent but couldn't find in parent's children!"), }; Some((parent, i)) } else { None } } fn append_to_existing_text(prev: &Handle, text: &str) -> bool { match prev.data { NodeData::Text { ref contents } => { contents.borrow_mut().push_slice(text); true }, _ => false, } } fn remove_from_parent(target: &Handle) { if let Some((parent, i)) = get_parent_and_index(target) { parent.children.borrow_mut().remove(i); target.parent.set(None); } } /// The DOM itself; the result of parsing. pub struct RcDom { /// The `Document` itself. pub document: Handle, /// Errors that occurred during parsing. pub errors: Vec>, /// The document's quirks mode. pub quirks_mode: QuirksMode, } impl TreeSink for RcDom { type Output = Self; fn finish(self) -> Self { self } type Handle = Handle; fn parse_error(&mut self, msg: Cow<'static, str>) { self.errors.push(msg); } fn get_document(&mut self) -> Handle { self.document.clone() } fn get_template_contents(&mut self, target: &Handle) -> Handle { if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data { contents.clone() } else { panic!("not a template element!") } } fn set_quirks_mode(&mut self, mode: QuirksMode) { self.quirks_mode = mode; } fn same_node(&self, x: &Handle, y: &Handle) -> bool { Rc::ptr_eq(x, y) } fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> { return match target.data { NodeData::Element { ref name, .. } => name.expanded(), _ => panic!("not an element!"), }; } fn create_element( &mut self, name: QualName, attrs: Vec, flags: ElementFlags, ) -> Handle { Node::new(NodeData::Element { name: name, attrs: RefCell::new(attrs), template_contents: if flags.template { Some(Node::new(NodeData::Document)) } else { None }, mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, }) } fn create_comment(&mut self, text: StrTendril) -> Handle { Node::new(NodeData::Comment { contents: text }) } fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle { Node::new(NodeData::ProcessingInstruction { target: target, contents: data, }) } fn append(&mut self, parent: &Handle, child: NodeOrText) { // Append to an existing Text node if we have one. match child { NodeOrText::AppendText(ref text) => match parent.children.borrow().last() { Some(h) => { if append_to_existing_text(h, &text) { return; } }, _ => (), }, _ => (), } append( &parent, match child { NodeOrText::AppendText(text) => Node::new(NodeData::Text { contents: RefCell::new(text), }), NodeOrText::AppendNode(node) => node, }, ); } fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText) { let (parent, i) = get_parent_and_index(&sibling) .expect("append_before_sibling called on node without parent"); let child = match (child, i) { // No previous node. (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text { contents: RefCell::new(text), }), // Look for a text node before the insertion point. (NodeOrText::AppendText(text), i) => { let children = parent.children.borrow(); let prev = &children[i - 1]; if append_to_existing_text(prev, &text) { return; } Node::new(NodeData::Text { contents: RefCell::new(text), }) }, // The tree builder promises we won't have a text node after // the insertion point. // Any other kind of node. (NodeOrText::AppendNode(node), _) => node, }; remove_from_parent(&child); child.parent.set(Some(Rc::downgrade(&parent))); parent.children.borrow_mut().insert(i, child); } fn append_based_on_parent_node( &mut self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText, ) { let parent = element.parent.take(); let has_parent = parent.is_some(); element.parent.set(parent); if has_parent { self.append_before_sibling(element, child); } else { self.append(prev_element, child); } } fn append_doctype_to_document( &mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, ) { append( &self.document, Node::new(NodeData::Doctype { name: name, public_id: public_id, system_id: system_id, }), ); } fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec) { let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { attrs.borrow_mut() } else { panic!("not an element") }; let existing_names = existing .iter() .map(|e| e.name.clone()) .collect::>(); existing.extend( attrs .into_iter() .filter(|attr| !existing_names.contains(&attr.name)), ); } fn remove_from_parent(&mut self, target: &Handle) { remove_from_parent(&target); } fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { let mut children = node.children.borrow_mut(); let mut new_children = new_parent.children.borrow_mut(); for child in children.iter() { let previous_parent = child.parent.replace(Some(Rc::downgrade(&new_parent))); assert!(Rc::ptr_eq( &node, &previous_parent.unwrap().upgrade().expect("dangling weak") )) } new_children.extend(mem::replace(&mut *children, Vec::new())); } fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { mathml_annotation_xml_integration_point } else { panic!("not an element!") } } } impl Default for RcDom { fn default() -> RcDom { RcDom { document: Node::new(NodeData::Document), errors: vec![], quirks_mode: tree_builder::NoQuirks, } } } enum SerializeOp { Open(Handle), Close(QualName), } pub struct SerializableHandle(Handle); impl From for SerializableHandle { fn from(h: Handle) -> SerializableHandle { SerializableHandle(h) } } impl Serialize for SerializableHandle { fn serialize(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()> where S: Serializer, { let mut ops = match traversal_scope { IncludeNode => vec![SerializeOp::Open(self.0.clone())], ChildrenOnly(_) => self .0 .children .borrow() .iter() .map(|h| SerializeOp::Open(h.clone())) .collect(), }; while !ops.is_empty() { match ops.remove(0) { SerializeOp::Open(handle) => match &handle.data { &NodeData::Element { ref name, ref attrs, .. } => { serializer.start_elem( name.clone(), attrs.borrow().iter().map(|at| (&at.name, &at.value[..])), )?; ops.insert(0, SerializeOp::Close(name.clone())); for child in handle.children.borrow().iter().rev() { ops.insert(0, SerializeOp::Open(child.clone())); } }, &NodeData::Doctype { ref name, .. } => serializer.write_doctype(&name)?, &NodeData::Text { ref contents } => { serializer.write_text(&contents.borrow())? }, &NodeData::Comment { ref contents } => serializer.write_comment(&contents)?, &NodeData::ProcessingInstruction { ref target, ref contents, } => serializer.write_processing_instruction(target, contents)?, &NodeData::Document => panic!("Can't serialize Document node itself"), }, SerializeOp::Close(name) => { serializer.end_elem(name)?; }, } } Ok(()) } }