Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove mutability from parser/tokenizer APIs. #548

Merged
merged 8 commits into from
Aug 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ jobs:
- name: Install stable toolchain
run: |
rustup set profile minimal
rustup override set 1.61.0
rustup override set 1.65.0

- run: cargo check --lib --all-features

4 changes: 2 additions & 2 deletions html5ever/benches/html5ever.rs
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ struct Sink;
impl TokenSink for Sink {
type Handle = ();

fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
// Don't use the token, but make sure we don't get
// optimized out entirely.
black_box(token);
@@ -53,7 +53,7 @@ fn run_bench(c: &mut Criterion, name: &str) {

c.bench_function(&test_name, move |b| {
b.iter(|| {
let mut tok = Tokenizer::new(Sink, Default::default());
let tok = Tokenizer::new(Sink, Default::default());
let buffer = BufferQueue::default();
// We are doing clone inside the bench function, this is not ideal, but possibly
// necessary since our iterator consumes the underlying buffer.
34 changes: 17 additions & 17 deletions html5ever/examples/arena.rs
Original file line number Diff line number Diff line change
@@ -24,7 +24,7 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
quirks_mode: Cell::new(QuirksMode::NoQuirks),
};

parse_document(sink, Default::default())
@@ -41,7 +41,7 @@ type Link<'arena> = Cell<Option<Ref<'arena>>>;
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
quirks_mode: Cell<QuirksMode>,
}

/// DOM node which contains links to other nodes in the tree.
@@ -188,14 +188,14 @@ impl<'arena> TreeSink for Sink<'arena> {
self.document
}

fn parse_error(&mut self, _: Cow<'static, str>) {}
fn parse_error(&self, _: Cow<'static, str>) {}

fn get_document(&mut self) -> Ref<'arena> {
fn get_document(&self) -> Ref<'arena> {
self.document
}

fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.quirks_mode = mode;
fn set_quirks_mode(&self, mode: QuirksMode) {
self.quirks_mode.set(mode);
}

fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
@@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}
}

fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(contents),
..
@@ -234,7 +234,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn create_element(
&mut self,
&self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
@@ -251,26 +251,26 @@ impl<'arena> TreeSink for Sink<'arena> {
})
}

fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
fn create_comment(&self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}

fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}

fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append(&self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}

fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
@@ -279,7 +279,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_based_on_parent_node(
&mut self,
&self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
@@ -292,7 +292,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}

fn append_doctype_to_document(
&mut self,
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
@@ -304,7 +304,7 @@ impl<'arena> TreeSink for Sink<'arena> {
}))
}

fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
@@ -322,11 +322,11 @@ impl<'arena> TreeSink for Sink<'arena> {
);
}

fn remove_from_parent(&mut self, target: &Ref<'arena>) {
fn remove_from_parent(&self, target: &Ref<'arena>) {
target.detach()
}

fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
9 changes: 5 additions & 4 deletions html5ever/examples/noop-tokenize.rs
Original file line number Diff line number Diff line change
@@ -11,20 +11,21 @@

extern crate html5ever;

use std::cell::RefCell;
use std::io;

use html5ever::tendril::*;
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};

/// In our case, our sink only contains a tokens vector
struct Sink(Vec<Token>);
struct Sink(RefCell<Vec<Token>>);

impl TokenSink for Sink {
type Handle = ();

/// Each processed token will be handled by this method
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.push(token);
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
self.0.borrow_mut().push(token);
TokenSinkResult::Continue
}
}
@@ -39,7 +40,7 @@ fn main() {
let input = BufferQueue::default();
input.push_back(chunk.try_reinterpret().unwrap());

let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default());
let _ = tok.feed(&input);
assert!(input.is_empty());
tok.end();
64 changes: 38 additions & 26 deletions html5ever/examples/noop-tree-builder.rs
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
extern crate html5ever;

use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::io;

@@ -20,14 +21,14 @@ use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, QualName};

struct Sink {
next_id: usize,
names: HashMap<usize, QualName>,
next_id: Cell<usize>,
names: RefCell<HashMap<usize, &'static QualName>>,
}

impl Sink {
fn get_id(&mut self) -> usize {
let id = self.next_id;
self.next_id += 2;
fn get_id(&self) -> usize {
let id = self.next_id.get();
self.next_id.set(id + 2);
id
}
}
@@ -43,12 +44,13 @@ impl TreeSink for Sink {
self
}

fn get_document(&mut self) -> usize {
fn get_document(&self) -> usize {
0
}

fn get_template_contents(&mut self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
fn get_template_contents(&self, target: &usize) -> usize {
if let Some(expanded_name!(html "template")) =
self.names.borrow().get(target).map(|n| n.expanded())
{
target + 1
} else {
@@ -61,53 +63,63 @@ impl TreeSink for Sink {
}

fn elem_name(&self, target: &usize) -> ExpandedName {
self.names.get(target).expect("not an element").expanded()
self.names
.borrow()
.get(target)
.expect("not an element")
.expanded()
}

fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
fn create_element(&self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
let id = self.get_id();
self.names.insert(id, name);
// N.B. We intentionally leak memory here to minimize the implementation complexity
// of this example code. A real implementation would either want to use a real
// real DOM tree implentation, or else use an arena as the backing store for
// memory used by the parser.
self.names
.borrow_mut()
.insert(id, Box::leak(Box::new(name)));
id
}

fn create_comment(&mut self, _text: StrTendril) -> usize {
fn create_comment(&self, _text: StrTendril) -> usize {
self.get_id()
}

#[allow(unused_variables)]
fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize {
unimplemented!()
}

fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText<usize>) {}

fn append_based_on_parent_node(
&mut self,
&self,
_element: &usize,
_prev_element: &usize,
_new_node: NodeOrText<usize>,
) {
}

fn parse_error(&mut self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {}
fn parse_error(&self, _msg: Cow<'static, str>) {}
fn set_quirks_mode(&self, _mode: QuirksMode) {}
fn append(&self, _parent: &usize, _child: NodeOrText<usize>) {}

fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.contains_key(target), "not an element");
fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {}
fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec<Attribute>) {
assert!(self.names.borrow().contains_key(target), "not an element");
}
fn remove_from_parent(&mut self, _target: &usize) {}
fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&mut self, _node: &usize) {}
fn remove_from_parent(&self, _target: &usize) {}
fn reparent_children(&self, _node: &usize, _new_parent: &usize) {}
fn mark_script_already_started(&self, _node: &usize) {}
}

/// In this example we implement the TreeSink trait which takes each parsed elements and insert
/// it to a hashmap, while each element is given a numeric id.
fn main() {
let sink = Sink {
next_id: 1,
names: HashMap::new(),
next_id: Cell::new(1),
names: RefCell::new(HashMap::new()),
};

// Read HTML from the standard input and parse it
Loading