Skip to content

Commit

Permalink
Fix. Timing of 'start_document'
Browse files Browse the repository at this point in the history
  • Loading branch information
masataka committed Oct 14, 2020
1 parent 11daa78 commit 8d401d2
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 19 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ When using in SAX style, create an instance of the parser and register the liste
The XML to be parsed is specified by Deno.Reader, UINT8 array, or a character string.

```typescript
import { SAXParser } from 'https://denopkg.com/m-kur/xmlp@v0.10/mod.ts';
import { SAXParser } from 'https://denopkg.com/masataka/xmlp@v0.11/mod.ts';

// create a SAX parser instance
const parser = new SAXParser();
Expand Down Expand Up @@ -59,7 +59,7 @@ I think it's more interesting to write the Pull style than the SAX. This Pull pa
Currently the Pull parser supports Uint8 arrays and strings, not Deno.Reader.

```typeScript
import { PullParser } from 'https://denopkg.com/m-kur/xmlp@v0.10/mod.ts';
import { PullParser } from 'https://denopkg.com/masataka/xmlp@v0.11/mod.ts';

// create a pull parser instance
const parser = new PullParser();
Expand Down
15 changes: 8 additions & 7 deletions handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,14 @@ function isWhitespace(c: string): boolean {

// BEFORE_DOCUMENT; FOUND_LT, Error
export function handleBeforeDocument(cx: XMLParseContext, c: string): XMLParseEvent[] {
let events: XMLParseEvent[] = [];
if (c === '<') {
events = [['start_document']];
cx.state = 'FOUND_LT';
} else {
if (!isWhitespace(c)) {
throw new XMLParseError('Non-whitespace before document.', cx);
}
}
return events;
return [];
}

// GENERAL_STUFF; FOUND_LT
Expand Down Expand Up @@ -86,7 +84,7 @@ export function handleProcInstEnding(cx: XMLParseContext, c: string): XMLParseEv
if (c === '>') {
events = [['processing_instruction', cx.memento]];
cx.clearMemento();
cx.state = 'GENERAL_STUFF';
cx.state = cx.elementLength > 0 ? 'GENERAL_STUFF' : 'BEFORE_DOCUMENT';
} else {
cx.appendMemento(`?${c}`);
cx.state = 'PROC_INST';
Expand All @@ -113,7 +111,7 @@ export function handleSgmlDecl(cx: XMLParseContext, c: string): XMLParseEvent[]
} else if (c === '>') {
events = [['sgml_declaration', cx.memento]];
cx.clearMemento();
cx.state = 'GENERAL_STUFF';
cx.state = cx.elementLength > 0 ? 'GENERAL_STUFF' : 'BEFORE_DOCUMENT';
} else {
cx.appendMemento(c);
}
Expand Down Expand Up @@ -197,13 +195,13 @@ export function handleCommentEnding2(cx: XMLParseContext, c: string): XMLParseEv
return events;
}

// DOCTYPE; doctype & GENERAL_STUFF
// DOCTYPE; doctype & BEFORE_DOCUMENT
export function handleDoctype(cx: XMLParseContext, c: string): XMLParseEvent[] {
let events: XMLParseEvent[] = [];
if (c === '>') {
events = [['doctype', cx.memento]];
cx.clearMemento();
cx.state = 'GENERAL_STUFF';
cx.state = 'BEFORE_DOCUMENT';
} else {
cx.appendMemento(c);
}
Expand All @@ -212,6 +210,9 @@ export function handleDoctype(cx: XMLParseContext, c: string): XMLParseEvent[] {

function emitStartElement(cx: XMLParseContext): XMLParseEvent[] {
const events: XMLParseEvent[] = [];
if (cx.elementLength === 1) {
events.push(['start_document']);
}
const element = cx.peekElement()!;
for (const { ns, uri } of element.prefixMappings) {
cx.registerNamespace(ns, uri);
Expand Down
17 changes: 11 additions & 6 deletions handler_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,13 @@ Deno.test('handleProcInst', () => {

Deno.test('handleProcInstEnding', () => {
const cx = new XMLParseContext();
// processing_instruction & GENERAL_STUFF
// processing_instruction & BEFORE_DOCUMENT
cx.state = 'PROC_INST_ENDING';
cx.appendMemento('test');
const [[event, procInst]] = handler.handleProcInstEnding(cx, '>');
assertEquals(event, 'processing_instruction');
assertEquals(procInst, 'test');
assertEquals(cx.state, 'GENERAL_STUFF');
assertEquals(cx.state, 'BEFORE_DOCUMENT');
// stay
cx.state = 'PROC_INST_ENDING';
cx.appendMemento('test');
Expand All @@ -102,13 +102,13 @@ Deno.test('handleSgmlDecl', () => {
cx.appendMemento('DOCTYP');
handler.handleSgmlDecl(cx, 'E');
assertEquals(cx.state, 'DOCTYPE');
// sgml_declaration & GENERAL_STUFF
// sgml_declaration & BEFORE_DOCUMENT
cx.state = 'SGML_DECL';
cx.appendMemento('test');
const [[event, sgml]] = handler.handleSgmlDecl(cx, '>');
assertEquals(event, 'sgml_declaration');
assertEquals(sgml, 'test');
assertEquals(cx.state, 'GENERAL_STUFF');
assertEquals(cx.state, 'BEFORE_DOCUMENT');
assertEquals(cx.memento, '');
// Error
cx.state = 'SGML_DECL';
Expand Down Expand Up @@ -204,19 +204,20 @@ Deno.test('handleCommentEnding2', () => {

Deno.test('handleDoctype', () => {
const cx = new XMLParseContext();
// doctype & GENERAL_STUFF
// doctype & BEFORE_DOCUMENT
cx.state = 'DOCTYPE';
cx.appendMemento('tes');
handler.handleDoctype(cx, 't');
const [[event, doctype]] = handler.handleDoctype(cx, '>');
assertEquals(event, 'doctype');
assertEquals(doctype, 'test');
assertEquals(cx.state, 'GENERAL_STUFF');
assertEquals(cx.state, 'BEFORE_DOCUMENT');
});

Deno.test('handleStartTag', () => {
const cx = new XMLParseContext();
// start_element & GENERAL_STUFF
cx.newElement('root');
cx.state = 'START_TAG';
cx.appendMemento('a');
const [[event, element]] = handler.handleStartTag(cx, '>');
Expand All @@ -239,6 +240,7 @@ Deno.test('handleStartTag', () => {
Deno.test('handleStartTagStuff', () => {
const cx = new XMLParseContext();
// start_element & GENERAL_STUFF
cx.newElement('root');
cx.state = 'START_TAG_STUFF';
cx.newElement('a');
const [[event, element]] = handler.handleStartTagStuff(cx, '>');
Expand All @@ -262,6 +264,7 @@ Deno.test('handleStartTagStuff', () => {
Deno.test('handleEmptyElementTag', () => {
const cx = new XMLParseContext();
// start_element & end_element & GENERAL_STUFF
cx.newElement('root');
cx.state = 'EMPTY_ELEMENT_TAG';
cx.newElement('test');
const [[event0, element0], [event1, element1]] = handler.handleEmptyElementTag(cx, '>');
Expand Down Expand Up @@ -322,6 +325,7 @@ Deno.test('handleAttributeEqual', () => {
Deno.test('handleAttributeValueStart', () => {
const cx = new XMLParseContext();
// ATTRIBUTE_VALUE_END
cx.newElement('root');
cx.state = 'ATTRIBUTE_VALUE_START';
cx.newElement('a');
cx.peekElement()!.newAttribute('b');
Expand All @@ -343,6 +347,7 @@ Deno.test('handleAttributeValueEnd', () => {
handler.handleAttributeValueEnd(cx, '/');
assertEquals(cx.state, 'EMPTY_ELEMENT_TAG');
// start_element & GENERAL_STUFF
cx.newElement('root');
cx.state = 'ATTRIBUTE_VALUE_END';
cx.newElement('a');
const [[event, element]] = handler.handleAttributeValueEnd(cx, '>');
Expand Down
9 changes: 8 additions & 1 deletion parser_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,15 @@ Deno.test('PullParser', async () => {
const parser = new PullParser();
const file = await Deno.readFile('parser_test.xml');
const events = parser.parse(file);
assertEquals(events.next().value, { name: 'start_document' });
assertEquals(events.next().value, { name: 'processing_instruction', procInst: 'xml version="1.0" encoding="utf-8"' });
assertEquals(events.next().value, { name: 'start_document' });
assertEquals(events.next().value, { name: 'start_prefix_mapping', ns: 'atom', uri: 'http://www.w3.org/2005/Atom' });
assertEquals(events.next().value, { name: 'start_prefix_mapping', ns: 'm', uri: 'https://xmlp.test/m' });
assertEquals((events.next().value as PullResult).element.qName, 'rss');
assertEquals((events.next().value as PullResult).element.qName, 'channel');
assertEquals((events.next().value as PullResult).element.qName, 'title');
assertEquals((events.next().value as PullResult).text, 'XML Parser for Deno');
assertEquals((events.next().value as PullResult).name, 'end_element');
while(true) {
const { done } = events.next();
if (done) {
Expand Down
6 changes: 3 additions & 3 deletions parser_test.xml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:m="https://xmlp.test/m">
<channel>
<title>SAX Parser for Deno</title>
<description>SAX Parser Library for Deno Stream Writer</description>
<title>XML Parser for Deno</title>
<description>XML Parser Library for Deno Stream Writer</description>
<link>https://xmlp.test/m/saxp</link>
<atom:link href="https://xmlp.test/m/saxp" rel="self" type="application/rss+xml"/>
<lastBuildDate>Sat, 10 Oct 2020 9:00:00 GMT</lastBuildDate>
Expand All @@ -12,7 +12,7 @@
<link>https://xmlp.test/m/saxp/2020/10/10/</link>
<guid isPermaLink="false">urn:uuid:12345678-abcd-efgh-ijkl-90mn123opq4r</guid>
<pubDate>Sat, 10 Oct 2020 12:00:00 GMT</pubDate>
<description>SAXPの利用方法と、サンプルコード</description>
<description>SAXParser &anp; PullParser</description>
<m:comment id="0">Good</m:comment>
<m:comment id="1">Bad</m:comment>
<m:comment id="2">Foo</m:comment>
Expand Down

0 comments on commit 8d401d2

Please sign in to comment.