diff --git a/Test/expected-results/hyperlinktest.xml b/Test/expected-results/hyperlinktest.xml index ce2ef962d..41c15525a 100644 --- a/Test/expected-results/hyperlinktest.xml +++ b/Test/expected-results/hyperlinktest.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test-indexes.xml b/Test/expected-results/test-indexes.xml index 6c26e0861..b40780ce8 100644 --- a/Test/expected-results/test-indexes.xml +++ b/Test/expected-results/test-indexes.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test-rtf2tei.xml b/Test/expected-results/test-rtf2tei.xml index beaba9fa1..633694d3d 100644 --- a/Test/expected-results/test-rtf2tei.xml +++ b/Test/expected-results/test-rtf2tei.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test11.xml b/Test/expected-results/test11.xml index 8d5919a53..10b51baa2 100644 --- a/Test/expected-results/test11.xml +++ b/Test/expected-results/test11.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test11a.xml b/Test/expected-results/test11a.xml index 8218d055f..bb2053bcd 100644 --- a/Test/expected-results/test11a.xml +++ b/Test/expected-results/test11a.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test18.xml b/Test/expected-results/test18.xml index 149409ea5..da5808b81 100644 --- a/Test/expected-results/test18.xml +++ b/Test/expected-results/test18.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test19.xml b/Test/expected-results/test19.xml index 275af0f5d..bf506cea7 100644 --- a/Test/expected-results/test19.xml +++ b/Test/expected-results/test19.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test29.xml b/Test/expected-results/test29.xml index b10ebaa8f..a56aa8560 100644 --- a/Test/expected-results/test29.xml +++ b/Test/expected-results/test29.xml @@ -20,7 +20,7 @@ - + @@ -175,7 +175,7 @@

- + unable to handle picture here, no embed or link

diff --git a/Test/expected-results/test37.xml b/Test/expected-results/test37.xml index d4ba53ae2..bc070773e 100644 --- a/Test/expected-results/test37.xml +++ b/Test/expected-results/test37.xml @@ -20,7 +20,7 @@ - + diff --git a/Test/expected-results/test39.xml b/Test/expected-results/test39.xml index 89de5b965..2def36d88 100644 --- a/Test/expected-results/test39.xml +++ b/Test/expected-results/test39.xml @@ -20,7 +20,7 @@ - + @@ -202,7 +202,7 @@

- + unable to handle picture here, no embed or link

diff --git a/Test/expected-results/test40.xml b/Test/expected-results/test40.xml index 67b13fb3a..56b6162ab 100644 --- a/Test/expected-results/test40.xml +++ b/Test/expected-results/test40.xml @@ -20,7 +20,7 @@ - + diff --git a/Test2/expected-results/testDocxIndexes1.xml b/Test2/expected-results/testDocxIndexes1.xml index ba48c9ed4..4feeeaed3 100644 --- a/Test2/expected-results/testDocxIndexes1.xml +++ b/Test2/expected-results/testDocxIndexes1.xml @@ -19,7 +19,7 @@ - + diff --git a/Test2/expected-results/testDocxListsTables1.xml b/Test2/expected-results/testDocxListsTables1.xml index 4787a37dd..bc9617799 100644 --- a/Test2/expected-results/testDocxListsTables1.xml +++ b/Test2/expected-results/testDocxListsTables1.xml @@ -19,7 +19,7 @@ - + diff --git a/Test2/expected-results/testNotes2.xml b/Test2/expected-results/testNotes2.xml index c34c68fc5..0a2ed267d 100644 --- a/Test2/expected-results/testNotes2.xml +++ b/Test2/expected-results/testNotes2.xml @@ -19,7 +19,7 @@ - + diff --git a/Test2/expected-results/testShelfmarkMs.xml b/Test2/expected-results/testShelfmarkMs.xml index b5f1b6315..1aac080b5 100644 --- a/Test2/expected-results/testShelfmarkMs.xml +++ b/Test2/expected-results/testShelfmarkMs.xml @@ -19,7 +19,7 @@ - + diff --git a/docx/from/docxtotei.xsl b/docx/from/docxtotei.xsl index b1e1b924c..959145f26 100644 --- a/docx/from/docxtotei.xsl +++ b/docx/from/docxtotei.xsl @@ -17,7 +17,7 @@ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:rel="http://schemas.openxmlformats.org/package/2006/relationships" xmlns:tbx="http://www.lisa.org/TBX-Specification.33.0.html" - xmlns:html="http://www.w3.org/1999/xhtml" + xmlns:html="http://www.w3.org/1999/xhtml" xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:teidocx="http://www.tei-c.org/ns/teidocx/1.0" xmlns:v="urn:schemas-microsoft-com:vml" @@ -31,52 +31,52 @@ version="2.0" exclude-result-prefixes="#all"> - - - - - - - - - true - mml - tei - tei - false - false - false - true - true - false - false - false - false - 890 - 576 - - - - - - - - - - - - - - - -

TEI stylesheet for converting Word docx files to TEI

-

This software is dual-licensed: - + + + + + + + + + true + mml + tei + tei + false + false + false + true + true + false + false + false + false + 890 + 576 + + + + + + + + + + + + + + + +

TEI stylesheet for converting Word docx files to TEI

+

This software is dual-licensed: + 1. Distributed under a Creative Commons Attribution-ShareAlike 3.0 Unported License http://creativecommons.org/licenses/by-sa/3.0/ 2. http://www.opensource.org/licenses/BSD-2-Clause - + Redistribution and use in source and binary forms, with or without @@ -101,87 +101,73 @@ data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. -

-

Author: See AUTHORS

- -

Copyright: 2013, TEI Consortium

-
-
- - - - - 1234567890 - ~!@#$%^&*()<>{}[]|:;,.?`'"=+-_ - - - - - - - - - - - - - - - - - -

The main template that starts the conversion from docx to TEI

-

IMPORTING STYLESHEETS AND OVERRIDING MATCHED TEMPLATES:

- -

When importing a stylesheet (xsl:import) all the templates - in the imported stylesheet get a lower import-precedence than - the ones in the importing stylesheet. If the importing - stylesheet wants to override, let's say a general template to - match all <w:p> elements where no more specialized rule - applies it can't, since it will automatically override all - w:p[somepredicate] template in the imported stylesheet as - well. In this case we have outsourced the processing of the - general template into a named template and all the imported - stylesheet does is to call the named template. Now, the - importing stylesheet can simply override the named template, - and everything works out fine.

- -

See templates: - w:p (mode: paragraph)

- -

Modes:

-
    -
  • pass0: a normalization process for styles. Can also - detect illegal styles.
  • - -
  • pass2: templates that apply in the second stage - of the conversion, cleaning TEI elements created in the - first ise."
  • - -
  • inSectionGroup: defines a template that works on a - group of consecutive elements (w:p or w:tbl elements) that - form a section (a normal section, not to be confused with - w:sectPr).
  • - -
  • paragraph: defines that the template - works on an individual element (usually - starting with a w:p element).
  • - -
  • iden: simply copies the content
  • -
- +

+

Author: See AUTHORS

+

Copyright: 2013, TEI Consortium

+
+
+ + + + + 1234567890 + ~!@#$%^&*()<>{}[]|:;,.?`'"=+-_ + + + + + + + + + + + + + + +

The main template that starts the conversion from docx to TEI

+

IMPORTING STYLESHEETS AND OVERRIDING MATCHED TEMPLATES:

+ +

When importing a stylesheet (xsl:import) all the templates + in the imported stylesheet get a lower import-precedence than + the ones in the importing stylesheet. If the importing + stylesheet wants to override, let's say a general template to + match all <w:p> elements where no more specialized rule + applies it can't, since it will automatically override all + w:p[somepredicate] template in the imported stylesheet as + well. In this case we have outsourced the processing of the + general template into a named template and all the imported + stylesheet does is to call the named template. Now, the + importing stylesheet can simply override the named template, + and everything works out fine.

+

See templates: - w:p (mode: paragraph)

+

Modes:

+
    +
  • pass0: a normalization process for styles. Can also + detect illegal styles.
  • +
  • pass2: templates that apply in the second stage + of the conversion, cleaning TEI elements created in the + first ise."
  • +
  • inSectionGroup: defines a template that works on a + group of consecutive elements (w:p or w:tbl elements) that + form a section (a normal section, not to be confused with + w:sectPr).
  • +
  • paragraph: defines that the template + works on an individual element (usually + starting with a w:p element).
  • +
  • iden: simply copies the content
  • +
- - The file cannot be read - - - The file cannot be read - + + The file cannot be read + + + The file cannot be read + @@ -189,14 +175,13 @@ of this software, even if advised of the possibility of such damage. - + - +
- @@ -204,9 +189,7 @@ of this software, even if advised of the possibility of such damage. - - @@ -215,7 +198,7 @@ of this software, even if advised of the possibility of such damage. - + @@ -223,239 +206,229 @@ of this software, even if advised of the possibility of such damage. Main document template - - - - - - - - - - - - - Create the basic text; worry later about dividing it up - - - - - - - - - - - - - - - - - Process the text by high-level divisions - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Bookmarks in section mode

-

- There are certain elements that we don't really care about, but that - force us to regroup everything from the next sibling on. - - @see grouping in construction of headline outline. -

-
-
- - - - - - - - - -

Bookmarks in normal mode

-

Copy bookmarks for processing in pass 2

-
-
- - - - - - - - - - - - - - - - - -

Grouping consecutive elements that belong together

-

- We are now working on a group of all elements inside some group bounded by - headings. These need to be further split up into smaller groups for figures, - list etc. and into individual groups for simple paragraphs... -

-
-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Creating a group of a figure + + + + + + + + + + + + Create the basic text; worry later about dividing it up - -
- - - -
-
+ + + + + + + + + + + + + + Process the text by high-level divisions + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bookmarks in section mode

+

+ There are certain elements that we don't really care about, but that + force us to regroup everything from the next sibling on. + + @see grouping in construction of headline outline. +

+
+
+ + + + + - - Creating a group of a caption (figure or table) + + +

Bookmarks in normal mode

+

Copy bookmarks for processing in pass 2

+
- - - - - - - + + + + + + + + + + + + + + + + + +

Grouping consecutive elements that belong together

+

+ We are now working on a group of all elements inside some group bounded by + headings. These need to be further split up into smaller groups for figures, + list etc. and into individual groups for simple paragraphs... +

+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - Creating a gloss list + + Creating a group of a figure + +
+ + + +
+
- - - - - - - + + Creating a group of a caption (figure or table) + + + + + + + + - - Creating a group of a figure + + Creating a gloss list - - - - - - - + + + + + + + - - Creating a group of a front/title page - - - - - - - - - - + + Creating a group of a figure + + + + + + + + - - Groups the document by headings and thereby creating the document structure. - + + Creating a group of a front/title page + + + + + + + + + + + Groups the document by headings and thereby creating the document structure. + @@ -463,225 +436,227 @@ of this software, even if advised of the possibility of such damage.
- + - - - - - - - - + group-starting-with="w:p[w:pPr/w:pStyle/@w:val=$NextHeader]"> + + + + + + + +
-

Looks through the document to find forme work related sections.

- Creates a <fw> element for each forme work related section. These include - running headers and footers. The corresponding elements in OOXML are w:headerReference - and w:footerReference. These elements only define a reference that to a header or - footer definition file. The reference itself is resolved in the file word/_rels/document.xml.rels. + Creates a <fw> element for each forme work related section. These include + running headers and footers. The corresponding elements in OOXML are w:headerReference + and w:footerReference. These elements only define a reference that to a header or + footer definition file. The reference itself is resolved in the file word/_rels/document.xml.rels.

- - - - - - - - - - header - footer - - - - - - - - - - - - - - - - - - - + + + + + + + + + + header + footer + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - simple teiHeader. For a more sophisticated header, think about overriding - this template - - - - - - - <xsl:call-template name="getDocTitle"/> - - - - - - - - - - - - - -

unknown

-
- -

Converted from a Word document

-
-
- - - - - - - - - - - - - - - -
-
- - - generates a section heading. If you need something specific, feel free - to overwrite this template - - - - - - - - + + simple teiHeader. For a more sophisticated header, think about overriding + this template + + + + + + + <xsl:call-template name="getDocTitle"/> + + + + + + + + + + + + + +

unknown

+
+ +

Converted from a Word document

+
+
+ + + + + + + + + + + + + + + +
+
+ + + generates a section heading. If you need something specific, feel free + to overwrite this template + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - unknown title - - - + + + + + + + unknown title + + + - - - - - - - unknown author - - - + + + + + + + unknown author + + + - - - - - - - unknown date - - - + + + + + + + unknown date + + + - - - - # - - - - - + + + + # + + + + + diff --git a/docx/from/functions.xsl b/docx/from/functions.xsl index 2a8b0fa13..71dfc6b6d 100644 --- a/docx/from/functions.xsl +++ b/docx/from/functions.xsl @@ -21,16 +21,16 @@ version="2.0" exclude-result-prefixes="cals ve o r m v wp w10 w wne mml tbx iso tei a xs pic fn"> - - -

TEI Utility stylesheet for making TEI XML from Word docx files

-

This software is dual-licensed: - + + +

TEI Utility stylesheet for making TEI XML from Word docx files

+

This software is dual-licensed: + 1. Distributed under a Creative Commons Attribution-ShareAlike 3.0 Unported License http://creativecommons.org/licenses/by-sa/3.0/ 2. http://www.opensource.org/licenses/BSD-2-Clause - + Redistribution and use in source and binary forms, with or without @@ -55,171 +55,177 @@ data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. -

-

Author: See AUTHORS

-

Id: $Id: functions.xsl 11232 2012-12-18 18:06:19Z rahtz $

-

Copyright: 2013, TEI Consortium

-
- - - - Defines whether or not a word paragraph is a first level heading. - - - - - true - false - - - - - Defines whether or not a word paragraph is a heading. - - - - - true - true - true - false - - - - - Defines whether or not a word paragraph is a front page element. - - - - - true - true - true - true - false - - - - - Defines whether or not a word paragraph is a list element. - - - - - - true - true - true - true - false - - +

+

Author: See AUTHORS

+

Id: $Id: functions.xsl 11232 2012-12-18 18:06:19Z rahtz $

+

Copyright: 2013, TEI Consortium

+ +
- - Defines whether or not a word paragraph is a table of contents. - - - - true - true - false - - - - - Defines whether or not a word paragraph is a figure element. - - - - true - true - false - - - - - Defines whether or not a word paragraph is a caption. - - - - true - true - false - - + + Defines whether or not a word paragraph is a first level heading. + + + + + true + false + + + + Defines whether or not a word paragraph is a heading. + + + + + + true + true + true + false + + - - Defines whether or not a word paragraph is a line of poetry. - - - - true - false - - - - - Defines whether or not a word paragraph is gloss list. - - - - true - false - - - - - Is given a header style and returns the style for the next level header. - - - - + + Defines whether or not a word paragraph is a front page element. + + + + + + true + true + true + true + false + + + + + Defines whether or not a word paragraph is a list element. + + + + + + + true + true + true + true + false + + + + Defines whether or not a word paragraph is a table of contents. + + + + + true + true + false + + + + + Defines whether or not a word paragraph is a figure element. + + + + + true + true + false + + + + + Defines whether or not a word paragraph is a caption. + + + + + true + true + false + + - - Returns a listtype for a given stylename (return empty string to figure it out dynamically). - - - - - - gloss - - - bulleted - - - bulleted - - - numbered - - - numbered - - - - - - - - - insert a note that a docx conversion cannot proceed - - - - - - docx conversion issue: - + + Defines whether or not a word paragraph is a line of poetry. + + + + + true + false + + + + + Defines whether or not a word paragraph is gloss list. + + + + + true + false + + + + + Is given a header style and returns the style for the next level header. + + + + + + + + Returns a listtype for a given stylename (return empty string to figure it out dynamically). + + + + + + gloss + + + bulleted + + + bulleted + + + numbered + + + numbered + + + + + + + + insert a note that a docx conversion cannot proceed + + + + + + docx conversion issue: + - - process a Word w:instrText + + process a Word w:instrText + @@ -228,8 +234,7 @@ of this software, even if advised of the possibility of such damage. - + @@ -260,11 +265,10 @@ of this software, even if advised of the possibility of such damage. - - - Whether a w:instrText can be discarded on not. ignore all - the bibliographic addins + + Whether a w:instrText can be discarded on not. ignore all the bibliographic addins + @@ -275,10 +279,11 @@ of this software, even if advised of the possibility of such damage. true false - - - - Whether a w:instrText is a bibliographic addin + + + + Whether a w:instrText is a bibliographic addin + @@ -287,7 +292,7 @@ of this software, even if advised of the possibility of such damage. true false - + Returns true or false for value types that may be "on" or "off".