Skip to content

Commit

Permalink
XInclude by xml:id (#198)
Browse files Browse the repository at this point in the history
  • Loading branch information
alfsb authored Dec 13, 2024
1 parent 7edcafd commit 4776d12
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 34 deletions.
182 changes: 148 additions & 34 deletions configure.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
| Authors: Dave Barr <[email protected]> |
| Hannes Magnusson <[email protected]> |
| Gwynne Raskind <[email protected]> |
| André L F S Bacci <[email protected]> |
+----------------------------------------------------------------------+
*/

Expand Down Expand Up @@ -782,13 +783,15 @@ function dom_load( DOMDocument $dom , string $filename ) : bool
return $dom->load( $filename , $options );
}

function dom_saveload( DOMDocument $dom , string $filename = "" )
function dom_saveload( DOMDocument $dom , string $filename = "" ) : string
{
if ( $filename == "" )
$filename = __DIR__ . "/temp/manual.xml";

$dom->save( $filename );
dom_load( $dom , $filename );

return $filename;
}

echo "Loading and parsing {$ac["INPUT_FILENAME"]}... ";
Expand All @@ -807,46 +810,140 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )


echo "Running XInclude/XPointer... ";
$total = 0;
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
for( $run = 0 ; $run < $maxrun ; $run++ )

$total = xinclude_run_byid( $dom );
$total += xinclude_run_xpointer( $dom );

if ( $total == 0 )
echo "failed.\n";
else
echo "done. Performed $total XIncludes.\n";

xinclude_report();
xinclude_residual( $dom );

function xinclude_run_byid( DOMDocument $dom )
{
if ( $run > 0 )
echo "$run ";
libxml_clear_errors();
$status = (int) $dom->xinclude();
if ( $status <= 0 )
break;
$total += $status;
if ( $maxrun > 1 && $run + 1 >= $maxrun )
$total = 0;
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
for( $run = 0 ; $run < $maxrun ; $run++ )
{
echo "Recursive XInclude is too deep.\n";
errors_are_bad(-1);
echo "$run ";
$xpath = new DOMXPath( $dom );
$xpath->registerNamespace( "xi" , "http://www.w3.org/2001/XInclude" );
$xincludes = $xpath->query( "//xi:include" );

$changed = false;
foreach( $xincludes as $xinclude )
{
$xpointer = $xinclude->getAttribute( "xpointer" );
$target = $xinclude->ownerDocument->getElementById( $xpointer );

if ( $target == null )
continue;

$other = new DOMDocument( '1.0' , 'utf8' );
$frags = $other->createDocumentFragment();
$other->append( $frags );
$frags->append( $other->importNode( $target , true ) ); // dup add

// "attributes in xml: namespace are not copied"

$oxpth = new DOMXPath( $other );
$attribs = $oxpth->query( "//@*" );

foreach( $attribs as $attrib )
if ( $attrib->prefix == "xml" )
$attrib->parentNode->removeAttribute( $attrib->nodeName );

$insert = $dom->importNode( $frags , true ); // dup
$xinclude->parentNode->insertBefore( $insert , $xinclude ); // add
$xinclude->parentNode->removeChild( $xinclude ); // del

$total++;
$changed = true;
libxml_clear_errors();
}

if ( ! $changed )
return $total;
}
echo "XInclude nested too deeply (xml:id).\n";
errors_are_bad( 1 );
}

if ($total == 0) {
echo "failed.\n";
} else {
echo "done. Performed $total XIncludes.\n";
function xinclude_run_xpointer( DOMDocument $dom ) : int
{
$total = 0;
$maxrun = 10; //LIBXML_VERSION >= 21100 ? 1 : 10;
for( $run = 0 ; $run < $maxrun ; $run++ )
{
echo "$run ";
$status = (int) $dom->xinclude();

if ( $status <= 0 )
{
return $total;
}
$total += $status;
libxml_clear_errors();
}
echo "XInclude nested too deeply (xpointer).\n";
errors_are_bad( 1 );
}
flush();

if ( $ac['XPOINTER_REPORTING'] == 'yes' || $ac['LANG'] == 'en' )
function xinclude_report()
{
global $ac;

$report = $ac['XPOINTER_REPORTING'] == 'yes' || $ac['LANG'] == 'en';
$output = $ac['STDERR_TO_STDOUT'] == 'yes' ? STDOUT : STDERR;
$fatal = $ac['LANG'] == 'en';

$errors = libxml_get_errors();
$output = ( $ac['STDERR_TO_STDOUT'] == 'yes' ) ? STDOUT : STDERR;
if ( count( $errors ) > 0 )
libxml_clear_errors();

if ( ! $report )
return;

$count = 0;
$prefix = realpath( __DIR__ );

$prevLine = -1;
$prevClmn = -1;

foreach( $errors as $error )
{
fprintf( $output , "\n");
foreach( $errors as $error )
fprintf( $output , "{$error->message}\n");
if ( $ac['LANG'] == 'en' )
errors_are_bad(1);
$msg = $error->message;
$file = $error->file;
$line = $error->line;
$clmn = $error->column;

if ( $prevLine == $line && $prevClmn == $clmn )
continue; // XPointer failures double reports sometimes
$prevLine = $line;
$prevClmn = $clmn;

$msg = rtrim( $msg );
if ( str_starts_with( $file , $prefix ) )
$file = substr( $file , strlen( $prefix ) + 1 );

if ( $count == 0 )
fprintf( $output , "\n" );

fprintf( $output , "[{$file} {$line}:{$clmn}] $msg\n" );
$count++;
}

if ( $count > 0 )
{
fprintf( $output , "\n" );
if ( $fatal )
errors_are_bad( 1 );
}
}

if ( $ac['LANG'] != 'en' )
function xinclude_residual( DOMDocument $dom )
{
// XInclude failures are soft errors on translations, so remove
// residual XInclude tags on translations to keep them building.
Expand All @@ -872,11 +969,11 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
case "tbody":
$fixup = "<row><entry></entry></row>";
break;
// case "variablelist":
// $fixup = "<varlistentry><term>></term><listitem><simpara></simpara></listitem></varlistentry>";
// break;
case "variablelist":
$fixup = "<varlistentry><term></term><listitem><simpara></simpara></listitem></varlistentry>";
break;
default:
echo "Unknown parent element of failed XInclude: $tagName\n";
echo "Unknown parent of failed XInclude: $tagName\n";
$explain = true;
continue 2;
}
Expand All @@ -899,7 +996,25 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )
state. Please report any "Unknown parent" messages on the doc-base
repository, and focus on fixing XInclude/XPointers failures above.\n\n
MSG;
exit(-1); // stop here, do not let more messages further confuse the matter
exit(1); // stop here, do not let more messages further confuse the matter
}

// XInclude by xml:id never duplicates xml:id, horever, also using
// XInclude by XPath/XPointer may start causing duplications
// (see docs/structure.md). Crude and ugly fixup ahead, beware!

$list = array();
$nodes = $xpath->query( "//*[@xml:id]" );
foreach( $nodes as $node )
{
$id = $node->getAttribute( "xml:id" );
if ( isset( $list[ $id ] ) )
{
if ( ! str_contains( $id , '..' ) )
echo " Random removing duplicated xml:id: $id\n";
$node->removeAttribute( "xml:id" );
}
$list[ $id ] = $id;
}
}

Expand Down Expand Up @@ -1001,4 +1116,3 @@ function dom_saveload( DOMDocument $dom , string $filename = "" )

errors_are_bad(1); // Tell the shell that this script finished with an error.
}
?>
17 changes: 17 additions & 0 deletions docs/structure.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The PHP Manual sources are stored in Git repositories.
To checkout the PHP Manual sources, follow the steps in [Setting up a documentation environment](local-setup.md)

## File structure

**Note for translators:** if any of the source files don't exist in your translation, the English content will be used
during the building process. This means that you *must not* place untranslated files in your translation tree. Otherwise,
it will lead to a mess, confusion and may break some tools.
Expand Down Expand Up @@ -42,3 +43,19 @@ There are some other important files:
Including common warnings, notes, etc.
- *translation.xml* - this file is used to store all central translation info, like a small
intro text for translators and the persons list. This file is not present in the English tree.

## `xml:id` structure

The PHP is complex, and uses `xml:id` extensively. For chunking,
linking and XInclude purposes. So some care is necessary to avoid
collisions. There are two pseudo-types of IDs used in manuals.

* **Structural IDs:** IDs that are present on structural elements of
DocBook XML (like `<chapter>`, `<section>` and so on);

* **XInclude IDs:** IDs that are used as target of XIncludes.

Structural IDs are in the pattern `id.id`, while XInclude IDs use the
pattern `structural.id..local.name`. That is, Structural IDs, the
name parts are separated with a single dot, while XInclude IDs start
with an Structural ID, an `..` separator, and a local path suffix.

0 comments on commit 4776d12

Please sign in to comment.