Closed
Description
Hello!
I stumbled across the HTML file, which contains comments between <style>
tags.
I try to parse it as follows using scraper 0.23.1:
use scraper::Html;
fn main() {
let html = r#"
<style><!-- /* Font Definitions */
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:11.0pt;
font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:#0563C1;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:#954F72;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-compose;
font-family:"Calibri",sans-serif;
color:windowtext;}
.MsoChpDefault
{mso-style-type:export-only;
font-family:"Calibri",sans-serif;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]-->
<div><!-- ignore comment --></div>
<h1>word1€
word2 word3 word4</h1>
<div> word5 <span>word6 <br> word7</span></div>
<p><span>word8 word9 word10</span></p>
<p>Some test
message<br></p>
"#;
let document = Html::parse_document(html);
println!("{:?}", document);
}
However, it classifies the content of <style>
as Text, not Comment. It doesn't seem to be the expected behavior to me.
Node { parent: Some(NodeId(4)), prev_sibling: None, next_sibling: None, children: None, value: Text("<!-- /* Font Definitions */\n@font-face\n\t{font-family:Calibri;\n\tpanose- [AND SO ON...]-->") },
Metadata
Metadata
Assignees
Labels
No labels