Skip to content

Commit 600824b

Browse files
authored
Merge pull request #2308 from ehuss/pulldown_cmark-0.10
Update pulldown_cmark to 0.10
2 parents d48810f + 42e635b commit 600824b

File tree

8 files changed

+366
-54
lines changed

8 files changed

+366
-54
lines changed

Cargo.lock

+10-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ handlebars = "5.0"
2727
log = "0.4.17"
2828
memchr = "2.5.0"
2929
opener = "0.6.1"
30-
pulldown-cmark = { version = "0.9.3", default-features = false }
30+
pulldown-cmark = { version = "0.10.0", default-features = false, features = ["html"] }
3131
regex = "1.8.1"
3232
serde = { version = "1.0.163", features = ["derive"] }
3333
serde_json = "1.0.96"

src/book/summary.rs

+36-19
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::errors::*;
22
use log::{debug, trace, warn};
3-
use memchr::{self, Memchr};
4-
use pulldown_cmark::{self, Event, HeadingLevel, Tag};
3+
use memchr::Memchr;
4+
use pulldown_cmark::{DefaultBrokenLinkCallback, Event, HeadingLevel, Tag, TagEnd};
55
use serde::{Deserialize, Serialize};
66
use std::fmt::{self, Display, Formatter};
77
use std::iter::FromIterator;
@@ -163,7 +163,7 @@ impl From<Link> for SummaryItem {
163163
/// > match the following regex: "[^<>\n[]]+".
164164
struct SummaryParser<'a> {
165165
src: &'a str,
166-
stream: pulldown_cmark::OffsetIter<'a, 'a>,
166+
stream: pulldown_cmark::OffsetIter<'a, DefaultBrokenLinkCallback>,
167167
offset: usize,
168168

169169
/// We can't actually put an event back into the `OffsetIter` stream, so instead we store it
@@ -210,7 +210,7 @@ macro_rules! collect_events {
210210
}
211211

212212
impl<'a> SummaryParser<'a> {
213-
fn new(text: &str) -> SummaryParser<'_> {
213+
fn new(text: &'a str) -> SummaryParser<'a> {
214214
let pulldown_parser = pulldown_cmark::Parser::new(text).into_offset_iter();
215215

216216
SummaryParser {
@@ -265,7 +265,12 @@ impl<'a> SummaryParser<'a> {
265265
loop {
266266
match self.next_event() {
267267
Some(ev @ Event::Start(Tag::List(..)))
268-
| Some(ev @ Event::Start(Tag::Heading(HeadingLevel::H1, ..))) => {
268+
| Some(
269+
ev @ Event::Start(Tag::Heading {
270+
level: HeadingLevel::H1,
271+
..
272+
}),
273+
) => {
269274
if is_prefix {
270275
// we've finished prefix chapters and are at the start
271276
// of the numbered section.
@@ -275,8 +280,8 @@ impl<'a> SummaryParser<'a> {
275280
bail!(self.parse_error("Suffix chapters cannot be followed by a list"));
276281
}
277282
}
278-
Some(Event::Start(Tag::Link(_type, href, _title))) => {
279-
let link = self.parse_link(href.to_string());
283+
Some(Event::Start(Tag::Link { dest_url, .. })) => {
284+
let link = self.parse_link(dest_url.to_string());
280285
items.push(SummaryItem::Link(link));
281286
}
282287
Some(Event::Rule) => items.push(SummaryItem::Separator),
@@ -304,10 +309,13 @@ impl<'a> SummaryParser<'a> {
304309
break;
305310
}
306311

307-
Some(Event::Start(Tag::Heading(HeadingLevel::H1, ..))) => {
312+
Some(Event::Start(Tag::Heading {
313+
level: HeadingLevel::H1,
314+
..
315+
})) => {
308316
debug!("Found a h1 in the SUMMARY");
309317

310-
let tags = collect_events!(self.stream, end Tag::Heading(HeadingLevel::H1, ..));
318+
let tags = collect_events!(self.stream, end TagEnd::Heading(HeadingLevel::H1));
311319
Some(stringify_events(tags))
312320
}
313321

@@ -336,7 +344,7 @@ impl<'a> SummaryParser<'a> {
336344
/// Finishes parsing a link once the `Event::Start(Tag::Link(..))` has been opened.
337345
fn parse_link(&mut self, href: String) -> Link {
338346
let href = href.replace("%20", " ");
339-
let link_content = collect_events!(self.stream, end Tag::Link(..));
347+
let link_content = collect_events!(self.stream, end TagEnd::Link);
340348
let name = stringify_events(link_content);
341349

342350
let path = if href.is_empty() {
@@ -377,7 +385,12 @@ impl<'a> SummaryParser<'a> {
377385
}
378386
// The expectation is that pulldown cmark will terminate a paragraph before a new
379387
// heading, so we can always count on this to return without skipping headings.
380-
Some(ev @ Event::Start(Tag::Heading(HeadingLevel::H1, ..))) => {
388+
Some(
389+
ev @ Event::Start(Tag::Heading {
390+
level: HeadingLevel::H1,
391+
..
392+
}),
393+
) => {
381394
// we're starting a new part
382395
self.back(ev);
383396
break;
@@ -398,7 +411,7 @@ impl<'a> SummaryParser<'a> {
398411

399412
// Skip over the contents of this tag
400413
while let Some(event) = self.next_event() {
401-
if event == Event::End(other_tag.clone()) {
414+
if event == Event::End(other_tag.clone().into()) {
402415
break;
403416
}
404417
}
@@ -469,7 +482,7 @@ impl<'a> SummaryParser<'a> {
469482

470483
last_item.nested_items = sub_items;
471484
}
472-
Some(Event::End(Tag::List(..))) => break,
485+
Some(Event::End(TagEnd::List(..))) => break,
473486
Some(_) => {}
474487
None => break,
475488
}
@@ -486,8 +499,8 @@ impl<'a> SummaryParser<'a> {
486499
loop {
487500
match self.next_event() {
488501
Some(Event::Start(Tag::Paragraph)) => continue,
489-
Some(Event::Start(Tag::Link(_type, href, _title))) => {
490-
let mut link = self.parse_link(href.to_string());
502+
Some(Event::Start(Tag::Link { dest_url, .. })) => {
503+
let mut link = self.parse_link(dest_url.to_string());
491504

492505
let mut number = parent.clone();
493506
number.0.push(num_existing_items as u32 + 1);
@@ -529,14 +542,18 @@ impl<'a> SummaryParser<'a> {
529542
fn parse_title(&mut self) -> Option<String> {
530543
loop {
531544
match self.next_event() {
532-
Some(Event::Start(Tag::Heading(HeadingLevel::H1, ..))) => {
545+
Some(Event::Start(Tag::Heading {
546+
level: HeadingLevel::H1,
547+
..
548+
})) => {
533549
debug!("Found a h1 in the SUMMARY");
534550

535-
let tags = collect_events!(self.stream, end Tag::Heading(HeadingLevel::H1, ..));
551+
let tags = collect_events!(self.stream, end TagEnd::Heading(HeadingLevel::H1));
536552
return Some(stringify_events(tags));
537553
}
538554
// Skip a HTML element such as a comment line.
539-
Some(Event::Html(_)) => {}
555+
Some(Event::Html(_) | Event::InlineHtml(_))
556+
| Some(Event::Start(Tag::HtmlBlock) | Event::End(TagEnd::HtmlBlock)) => {}
540557
// Otherwise, no title.
541558
Some(ev) => {
542559
self.back(ev);
@@ -744,7 +761,7 @@ mod tests {
744761
let _ = parser.stream.next(); // Discard opening paragraph
745762

746763
let href = match parser.stream.next() {
747-
Some((Event::Start(Tag::Link(_type, href, _title)), _range)) => href.to_string(),
764+
Some((Event::Start(Tag::Link { dest_url, .. }), _range)) => dest_url.to_string(),
748765
other => panic!("Unreachable, {:?}", other),
749766
};
750767

src/renderer/html_handlebars/search.rs

+41-13
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,23 @@ fn add_doc(
6666
index: &mut Index,
6767
doc_urls: &mut Vec<String>,
6868
anchor_base: &str,
69-
section_id: &Option<String>,
69+
heading: &str,
70+
id_counter: &mut HashMap<String, usize>,
71+
section_id: &Option<CowStr<'_>>,
7072
items: &[&str],
7173
) {
72-
let url = if let Some(ref id) = *section_id {
74+
// Either use the explicit section id the user specified, or generate one
75+
// from the heading content.
76+
let section_id = section_id.as_ref().map(|id| id.to_string()).or_else(|| {
77+
if heading.is_empty() {
78+
// In the case where a chapter has no heading, don't set a section id.
79+
None
80+
} else {
81+
Some(utils::unique_id_from_content(heading, id_counter))
82+
}
83+
});
84+
85+
let url = if let Some(id) = section_id {
7386
Cow::Owned(format!("{}#{}", anchor_base, id))
7487
} else {
7588
Cow::Borrowed(anchor_base)
@@ -119,30 +132,29 @@ fn render_item(
119132
let mut id_counter = HashMap::new();
120133
while let Some(event) = p.next() {
121134
match event {
122-
Event::Start(Tag::Heading(i, ..)) if i as u32 <= max_section_depth => {
135+
Event::Start(Tag::Heading { level, id, .. }) if level as u32 <= max_section_depth => {
123136
if !heading.is_empty() {
124137
// Section finished, the next heading is following now
125138
// Write the data to the index, and clear it for the next section
126139
add_doc(
127140
index,
128141
doc_urls,
129142
&anchor_base,
143+
&heading,
144+
&mut id_counter,
130145
&section_id,
131146
&[&heading, &body, &breadcrumbs.join(" » ")],
132147
);
133-
section_id = None;
134148
heading.clear();
135149
body.clear();
136150
breadcrumbs.pop();
137151
}
138152

153+
section_id = id;
139154
in_heading = true;
140155
}
141-
Event::End(Tag::Heading(i, id, _classes)) if i as u32 <= max_section_depth => {
156+
Event::End(TagEnd::Heading(level)) if level as u32 <= max_section_depth => {
142157
in_heading = false;
143-
section_id = id
144-
.map(|id| id.to_string())
145-
.or_else(|| Some(utils::unique_id_from_content(&heading, &mut id_counter)));
146158
breadcrumbs.push(heading.clone());
147159
}
148160
Event::Start(Tag::FootnoteDefinition(name)) => {
@@ -159,9 +171,19 @@ fn render_item(
159171
html_block.push_str(html);
160172
p.next();
161173
}
162-
163174
body.push_str(&clean_html(&html_block));
164175
}
176+
Event::InlineHtml(html) => {
177+
// This is not capable of cleaning inline tags like
178+
// `foo <script>…</script>`. The `<script>` tags show up as
179+
// individual InlineHtml events, and the content inside is
180+
// just a regular Text event. There isn't a very good way to
181+
// know how to collect all the content in-between. I'm not
182+
// sure if this is easily fixable. It should be extremely
183+
// rare, since script and style tags should almost always be
184+
// blocks, and worse case you have some noise in the index.
185+
body.push_str(&clean_html(&html));
186+
}
165187
Event::Start(_) | Event::End(_) | Event::Rule | Event::SoftBreak | Event::HardBreak => {
166188
// Insert spaces where HTML output would usually separate text
167189
// to ensure words don't get merged together
@@ -188,18 +210,24 @@ fn render_item(
188210
}
189211

190212
if !body.is_empty() || !heading.is_empty() {
191-
if heading.is_empty() {
213+
let title = if heading.is_empty() {
192214
if let Some(chapter) = breadcrumbs.first() {
193-
heading = chapter.clone();
215+
chapter
216+
} else {
217+
""
194218
}
195-
}
219+
} else {
220+
&heading
221+
};
196222
// Make sure the last section is added to the index
197223
add_doc(
198224
index,
199225
doc_urls,
200226
&anchor_base,
227+
&heading,
228+
&mut id_counter,
201229
&section_id,
202-
&[&heading, &body, &breadcrumbs.join(" » ")],
230+
&[title, &body, &breadcrumbs.join(" » ")],
203231
);
204232
}
205233

src/utils/mod.rs

+26-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ pub(crate) mod toml_ext;
66
use crate::errors::Error;
77
use log::error;
88
use once_cell::sync::Lazy;
9-
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag};
9+
use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag, TagEnd};
1010
use regex::Regex;
1111

1212
use std::borrow::Cow;
@@ -161,13 +161,30 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> {
161161
}
162162

163163
match event {
164-
Event::Start(Tag::Link(link_type, dest, title)) => {
165-
Event::Start(Tag::Link(link_type, fix(dest, path), title))
166-
}
167-
Event::Start(Tag::Image(link_type, dest, title)) => {
168-
Event::Start(Tag::Image(link_type, fix(dest, path), title))
169-
}
164+
Event::Start(Tag::Link {
165+
link_type,
166+
dest_url,
167+
title,
168+
id,
169+
}) => Event::Start(Tag::Link {
170+
link_type,
171+
dest_url: fix(dest_url, path),
172+
title,
173+
id,
174+
}),
175+
Event::Start(Tag::Image {
176+
link_type,
177+
dest_url,
178+
title,
179+
id,
180+
}) => Event::Start(Tag::Image {
181+
link_type,
182+
dest_url: fix(dest_url, path),
183+
title,
184+
id,
185+
}),
170186
Event::Html(html) => Event::Html(fix_html(html, path)),
187+
Event::InlineHtml(html) => Event::InlineHtml(fix_html(html, path)),
171188
_ => event,
172189
}
173190
}
@@ -177,7 +194,7 @@ pub fn render_markdown(text: &str, curly_quotes: bool) -> String {
177194
render_markdown_with_path(text, curly_quotes, None)
178195
}
179196

180-
pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_, '_> {
197+
pub fn new_cmark_parser(text: &str, curly_quotes: bool) -> Parser<'_> {
181198
let mut opts = Options::empty();
182199
opts.insert(Options::ENABLE_TABLES);
183200
opts.insert(Options::ENABLE_FOOTNOTES);
@@ -212,7 +229,7 @@ fn wrap_tables(event: Event<'_>) -> (Option<Event<'_>>, Option<Event<'_>>) {
212229
Some(Event::Html(r#"<div class="table-wrapper">"#.into())),
213230
Some(event),
214231
),
215-
Event::End(Tag::Table(_)) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
232+
Event::End(TagEnd::Table) => (Some(event), Some(Event::Html(r#"</div>"#.into()))),
216233
_ => (Some(event), None),
217234
}
218235
}

tests/dummy_book/src/conclusion.md

+4
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,7 @@ css looks, like this {
1818
}
1919
*/
2020
</style>
21+
22+
Sneaky inline event <script>alert("inline");</script>.
23+
24+
But regular <b>inline</b> is indexed.

0 commit comments

Comments
 (0)