Skip to content

Commit e774978

Browse files
committed
Added support for XML
1 parent 320b7f2 commit e774978

File tree

4 files changed

+367
-32
lines changed

4 files changed

+367
-32
lines changed

src/DiDom/Document.php

+113-30
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,35 @@ class Document
1515
*/
1616
protected $document;
1717

18+
/**
19+
* @var string
20+
*/
21+
protected $type;
22+
1823
/**
1924
* Constructor.
2025
*
21-
* @param string $html HTML code or file path
26+
* @param string $string HTML or XML string or file path
2227
* @param bool $isFile indicates that in first parameter was passed to the file path
2328
* @param string $encoding The document encoding
29+
* @param string $type The document type
2430
*/
25-
public function __construct($html = null, $isFile = false, $encoding = 'UTF-8')
31+
public function __construct($string = null, $isFile = false, $encoding = 'UTF-8', $type = 'html')
2632
{
27-
if ($html instanceof DOMDocument) {
28-
$this->document = $html;
33+
if ($string instanceof DOMDocument) {
34+
$this->document = $string;
2935

3036
return;
3137
}
3238

39+
if (!is_string($encoding)) {
40+
throw new InvalidArgumentException(sprintf('%s expects parameter 3 to be string, %s given', __METHOD__, gettype($encoding)));
41+
}
42+
3343
$this->document = new DOMDocument('1.0', $encoding);
3444

35-
if ($html !== null) {
36-
if ($isFile) {
37-
$this->loadHtmlFile($html);
38-
} else {
39-
$this->loadHtml($html);
40-
}
45+
if ($string !== null) {
46+
$this->load($string, $isFile, $type);
4147
}
4248
}
4349

@@ -85,34 +91,58 @@ public function appendChild($node)
8591
}
8692

8793
/**
88-
* Load HTML from a string.
94+
* Load HTML or XML.
8995
*
90-
* @param string $html The HTML string
91-
*
92-
* @return \DiDom\Document
93-
*
94-
* @throws \InvalidArgumentException if the provided argument is not a string
96+
* @param string $string HTML or XML string or file path
97+
* @param bool $isFile indicates that in first parameter was passed to the file path
98+
* @param string $type Type of document
9599
*/
96-
public function loadHtml($html)
100+
public function load($string, $isFile = false, $type = 'html')
97101
{
98-
if (!is_string($html)) {
99-
throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, (is_object($html) ? get_class($html) : gettype($html))));
102+
if (!is_string($string)) {
103+
throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, (is_object($string) ? get_class($string) : gettype($string))));
104+
}
105+
106+
if ($isFile) {
107+
$string = $this->loadFile($string);
108+
}
109+
110+
if (!is_string($string) or !in_array(strtolower($type), ['xml', 'html'])) {
111+
throw new InvalidArgumentException(sprintf('Document type must be "xml" or "html", %s given', __METHOD__, (is_object($type) ? get_class($type) : gettype($type))));
112+
}
113+
114+
if (substr($string, 0, 5) !== '<?xml') {
115+
$prolog = sprintf('<?xml encoding="%s">', $this->document->encoding);
116+
$string = $prolog.$string;
100117
}
101118

102-
$prolog = sprintf('<?xml encoding="%s">', $this->document->encoding);
103-
$html = $prolog.$html;
119+
$this->type = strtolower($type);
104120

105121
libxml_use_internal_errors(true);
106122
libxml_disable_entity_loader(true);
107123

108-
$this->document->loadHtml($html);
124+
$this->type === 'xml' ? $this->document->loadXml($string) : $this->document->loadHtml($string);
109125

110126
libxml_clear_errors();
111127

112128
libxml_disable_entity_loader(false);
113-
libxml_use_internal_errors(false);
129+
libxml_use_internal_errors(false);
114130

115-
return $this;
131+
return $this;
132+
}
133+
134+
/**
135+
* Load HTML from a string.
136+
*
137+
* @param string $html The HTML string
138+
*
139+
* @return \DiDom\Document
140+
*
141+
* @throws \InvalidArgumentException if the provided argument is not a string
142+
*/
143+
public function loadHtml($html)
144+
{
145+
return $this->load($html, false, 'html');
116146
}
117147

118148
/**
@@ -127,6 +157,41 @@ public function loadHtml($html)
127157
* @throws \RuntimeException if you are unable to load the file
128158
*/
129159
public function loadHtmlFile($filepath)
160+
{
161+
return $this->load($filepath, true, 'html');
162+
}
163+
164+
/**
165+
* Load XML from a string.
166+
*
167+
* @param string $xml The XML string
168+
*
169+
* @return \DiDom\Document
170+
*
171+
* @throws \InvalidArgumentException if the provided argument is not a string
172+
*/
173+
public function loadXml($xml)
174+
{
175+
return $this->load($xml, false, 'xml');
176+
}
177+
178+
/**
179+
* Load XML from a file.
180+
*
181+
* @param string $filepath The path to the XML file
182+
*
183+
* @return \DiDom\Document
184+
*
185+
* @throws \InvalidArgumentException if the file path is not a string
186+
* @throws \RuntimeException if the file does not exist
187+
* @throws \RuntimeException if you are unable to load the file
188+
*/
189+
public function loadXmlFile($filepath)
190+
{
191+
return $this->load($filepath, true, 'xml');
192+
}
193+
194+
protected function loadFile($filepath)
130195
{
131196
if (!is_string($filepath)) {
132197
throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, gettype($filepath)));
@@ -138,15 +203,13 @@ public function loadHtmlFile($filepath)
138203
}
139204
}
140205

141-
$html = file_get_contents($filepath);
206+
$content = file_get_contents($filepath);
142207

143-
if ($html === false) {
208+
if ($content === false) {
144209
throw new RuntimeException(sprintf('Could not load file %s', $filepath));
145210
}
146211

147-
$this->loadHtml($html);
148-
149-
return $this;
212+
return $content;
150213
}
151214

152215
/**
@@ -215,6 +278,16 @@ public function html()
215278
return trim($this->document->saveXML($this->getElement()));
216279
}
217280

281+
/**
282+
* Dumps the internal document into a string using XML formatting.
283+
*
284+
* @return string The document html
285+
*/
286+
public function xml()
287+
{
288+
return trim($this->document->saveXML());
289+
}
290+
218291
/**
219292
* Nicely formats output with indentation and extra space.
220293
*
@@ -263,6 +336,16 @@ public function is($document)
263336
return $this->getElement()->isSameNode($element);
264337
}
265338

339+
/**
340+
* Returns the type of document (XML or HTML).
341+
*
342+
* @return string
343+
*/
344+
public function getType()
345+
{
346+
return $this->type;
347+
}
348+
266349
/**
267350
* @return \DOMDocument
268351
*/
@@ -294,7 +377,7 @@ public function toElement()
294377
*/
295378
public function __toString()
296379
{
297-
return $this->html();
380+
return $this->type === 'xml' ? $this->xml() : $this->html();
298381
}
299382

300383
/**

src/DiDom/Element.php

+10
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,16 @@ public function html()
164164
return $this->toDocument()->html();
165165
}
166166

167+
/**
168+
* Dumps the internal document into a string using XML formatting.
169+
*
170+
* @return string The node xml
171+
*/
172+
public function xml()
173+
{
174+
return $this->toDocument()->xml();
175+
}
176+
167177
/**
168178
* Get the text content of this node and its descendants.
169179
*

0 commit comments

Comments
 (0)