diff --git a/README.md b/README.md index 3b160d3..ddbc886 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ When using in SAX style, create an instance of the parser and register the liste The XML to be parsed is specified by Deno.Reader, UINT8 array, or a character string. ```typescript -import { SAXParser } from 'https://denopkg.com/m-kur/xmlp@v0.10/mod.ts'; +import { SAXParser } from 'https://denopkg.com/masataka/xmlp@v0.11/mod.ts'; // create a SAX parser instance const parser = new SAXParser(); @@ -59,7 +59,7 @@ I think it's more interesting to write the Pull style than the SAX. This Pull pa Currently the Pull parser supports Uint8 arrays and strings, not Deno.Reader. ```typeScript -import { PullParser } from 'https://denopkg.com/m-kur/xmlp@v0.10/mod.ts'; +import { PullParser } from 'https://denopkg.com/masataka/xmlp@v0.11/mod.ts'; // create a pull parser instance const parser = new PullParser(); diff --git a/handler.ts b/handler.ts index 005e3b7..d7895d0 100644 --- a/handler.ts +++ b/handler.ts @@ -9,16 +9,14 @@ function isWhitespace(c: string): boolean { // BEFORE_DOCUMENT; FOUND_LT, Error export function handleBeforeDocument(cx: XMLParseContext, c: string): XMLParseEvent[] { - let events: XMLParseEvent[] = []; if (c === '<') { - events = [['start_document']]; cx.state = 'FOUND_LT'; } else { if (!isWhitespace(c)) { throw new XMLParseError('Non-whitespace before document.', cx); } } - return events; + return []; } // GENERAL_STUFF; FOUND_LT @@ -86,7 +84,7 @@ export function handleProcInstEnding(cx: XMLParseContext, c: string): XMLParseEv if (c === '>') { events = [['processing_instruction', cx.memento]]; cx.clearMemento(); - cx.state = 'GENERAL_STUFF'; + cx.state = cx.elementLength > 0 ? 'GENERAL_STUFF' : 'BEFORE_DOCUMENT'; } else { cx.appendMemento(`?${c}`); cx.state = 'PROC_INST'; @@ -113,7 +111,7 @@ export function handleSgmlDecl(cx: XMLParseContext, c: string): XMLParseEvent[] } else if (c === '>') { events = [['sgml_declaration', cx.memento]]; cx.clearMemento(); - cx.state = 'GENERAL_STUFF'; + cx.state = cx.elementLength > 0 ? 'GENERAL_STUFF' : 'BEFORE_DOCUMENT'; } else { cx.appendMemento(c); } @@ -197,13 +195,13 @@ export function handleCommentEnding2(cx: XMLParseContext, c: string): XMLParseEv return events; } -// DOCTYPE; doctype & GENERAL_STUFF +// DOCTYPE; doctype & BEFORE_DOCUMENT export function handleDoctype(cx: XMLParseContext, c: string): XMLParseEvent[] { let events: XMLParseEvent[] = []; if (c === '>') { events = [['doctype', cx.memento]]; cx.clearMemento(); - cx.state = 'GENERAL_STUFF'; + cx.state = 'BEFORE_DOCUMENT'; } else { cx.appendMemento(c); } @@ -212,6 +210,9 @@ export function handleDoctype(cx: XMLParseContext, c: string): XMLParseEvent[] { function emitStartElement(cx: XMLParseContext): XMLParseEvent[] { const events: XMLParseEvent[] = []; + if (cx.elementLength === 1) { + events.push(['start_document']); + } const element = cx.peekElement()!; for (const { ns, uri } of element.prefixMappings) { cx.registerNamespace(ns, uri); diff --git a/handler_test.ts b/handler_test.ts index f13e591..ee53343 100644 --- a/handler_test.ts +++ b/handler_test.ts @@ -70,13 +70,13 @@ Deno.test('handleProcInst', () => { Deno.test('handleProcInstEnding', () => { const cx = new XMLParseContext(); - // processing_instruction & GENERAL_STUFF + // processing_instruction & BEFORE_DOCUMENT cx.state = 'PROC_INST_ENDING'; cx.appendMemento('test'); const [[event, procInst]] = handler.handleProcInstEnding(cx, '>'); assertEquals(event, 'processing_instruction'); assertEquals(procInst, 'test'); - assertEquals(cx.state, 'GENERAL_STUFF'); + assertEquals(cx.state, 'BEFORE_DOCUMENT'); // stay cx.state = 'PROC_INST_ENDING'; cx.appendMemento('test'); @@ -102,13 +102,13 @@ Deno.test('handleSgmlDecl', () => { cx.appendMemento('DOCTYP'); handler.handleSgmlDecl(cx, 'E'); assertEquals(cx.state, 'DOCTYPE'); - // sgml_declaration & GENERAL_STUFF + // sgml_declaration & BEFORE_DOCUMENT cx.state = 'SGML_DECL'; cx.appendMemento('test'); const [[event, sgml]] = handler.handleSgmlDecl(cx, '>'); assertEquals(event, 'sgml_declaration'); assertEquals(sgml, 'test'); - assertEquals(cx.state, 'GENERAL_STUFF'); + assertEquals(cx.state, 'BEFORE_DOCUMENT'); assertEquals(cx.memento, ''); // Error cx.state = 'SGML_DECL'; @@ -204,19 +204,20 @@ Deno.test('handleCommentEnding2', () => { Deno.test('handleDoctype', () => { const cx = new XMLParseContext(); - // doctype & GENERAL_STUFF + // doctype & BEFORE_DOCUMENT cx.state = 'DOCTYPE'; cx.appendMemento('tes'); handler.handleDoctype(cx, 't'); const [[event, doctype]] = handler.handleDoctype(cx, '>'); assertEquals(event, 'doctype'); assertEquals(doctype, 'test'); - assertEquals(cx.state, 'GENERAL_STUFF'); + assertEquals(cx.state, 'BEFORE_DOCUMENT'); }); Deno.test('handleStartTag', () => { const cx = new XMLParseContext(); // start_element & GENERAL_STUFF + cx.newElement('root'); cx.state = 'START_TAG'; cx.appendMemento('a'); const [[event, element]] = handler.handleStartTag(cx, '>'); @@ -239,6 +240,7 @@ Deno.test('handleStartTag', () => { Deno.test('handleStartTagStuff', () => { const cx = new XMLParseContext(); // start_element & GENERAL_STUFF + cx.newElement('root'); cx.state = 'START_TAG_STUFF'; cx.newElement('a'); const [[event, element]] = handler.handleStartTagStuff(cx, '>'); @@ -262,6 +264,7 @@ Deno.test('handleStartTagStuff', () => { Deno.test('handleEmptyElementTag', () => { const cx = new XMLParseContext(); // start_element & end_element & GENERAL_STUFF + cx.newElement('root'); cx.state = 'EMPTY_ELEMENT_TAG'; cx.newElement('test'); const [[event0, element0], [event1, element1]] = handler.handleEmptyElementTag(cx, '>'); @@ -322,6 +325,7 @@ Deno.test('handleAttributeEqual', () => { Deno.test('handleAttributeValueStart', () => { const cx = new XMLParseContext(); // ATTRIBUTE_VALUE_END + cx.newElement('root'); cx.state = 'ATTRIBUTE_VALUE_START'; cx.newElement('a'); cx.peekElement()!.newAttribute('b'); @@ -343,6 +347,7 @@ Deno.test('handleAttributeValueEnd', () => { handler.handleAttributeValueEnd(cx, '/'); assertEquals(cx.state, 'EMPTY_ELEMENT_TAG'); // start_element & GENERAL_STUFF + cx.newElement('root'); cx.state = 'ATTRIBUTE_VALUE_END'; cx.newElement('a'); const [[event, element]] = handler.handleAttributeValueEnd(cx, '>'); diff --git a/parser_test.ts b/parser_test.ts index 0113ce0..5599a36 100644 --- a/parser_test.ts +++ b/parser_test.ts @@ -106,8 +106,15 @@ Deno.test('PullParser', async () => { const parser = new PullParser(); const file = await Deno.readFile('parser_test.xml'); const events = parser.parse(file); - assertEquals(events.next().value, { name: 'start_document' }); assertEquals(events.next().value, { name: 'processing_instruction', procInst: 'xml version="1.0" encoding="utf-8"' }); + assertEquals(events.next().value, { name: 'start_document' }); + assertEquals(events.next().value, { name: 'start_prefix_mapping', ns: 'atom', uri: 'http://www.w3.org/2005/Atom' }); + assertEquals(events.next().value, { name: 'start_prefix_mapping', ns: 'm', uri: 'https://xmlp.test/m' }); + assertEquals((events.next().value as PullResult).element.qName, 'rss'); + assertEquals((events.next().value as PullResult).element.qName, 'channel'); + assertEquals((events.next().value as PullResult).element.qName, 'title'); + assertEquals((events.next().value as PullResult).text, 'XML Parser for Deno'); + assertEquals((events.next().value as PullResult).name, 'end_element'); while(true) { const { done } = events.next(); if (done) { diff --git a/parser_test.xml b/parser_test.xml index d5041cb..e8736f0 100644 --- a/parser_test.xml +++ b/parser_test.xml @@ -1,8 +1,8 @@ - SAX Parser for Deno - SAX Parser Library for Deno Stream Writer + XML Parser for Deno + XML Parser Library for Deno Stream Writer https://xmlp.test/m/saxp Sat, 10 Oct 2020 9:00:00 GMT @@ -12,7 +12,7 @@ https://xmlp.test/m/saxp/2020/10/10/ urn:uuid:12345678-abcd-efgh-ijkl-90mn123opq4r Sat, 10 Oct 2020 12:00:00 GMT - SAXPの利用方法と、サンプルコード + SAXParser &anp; PullParser Good Bad Foo