@@ -15,29 +15,35 @@ class Document
15
15
*/
16
16
protected $ document ;
17
17
18
+ /**
19
+ * @var string
20
+ */
21
+ protected $ type ;
22
+
18
23
/**
19
24
* Constructor.
20
25
*
21
- * @param string $html HTML code or file path
26
+ * @param string $string HTML or XML string or file path
22
27
* @param bool $isFile indicates that in first parameter was passed to the file path
23
28
* @param string $encoding The document encoding
29
+ * @param string $type The document type
24
30
*/
25
- public function __construct ($ html = null , $ isFile = false , $ encoding = 'UTF-8 ' )
31
+ public function __construct ($ string = null , $ isFile = false , $ encoding = 'UTF-8 ' , $ type = ' html ' )
26
32
{
27
- if ($ html instanceof DOMDocument) {
28
- $ this ->document = $ html ;
33
+ if ($ string instanceof DOMDocument) {
34
+ $ this ->document = $ string ;
29
35
30
36
return ;
31
37
}
32
38
39
+ if (!is_string ($ encoding )) {
40
+ throw new InvalidArgumentException (sprintf ('%s expects parameter 3 to be string, %s given ' , __METHOD__ , gettype ($ encoding )));
41
+ }
42
+
33
43
$ this ->document = new DOMDocument ('1.0 ' , $ encoding );
34
44
35
- if ($ html !== null ) {
36
- if ($ isFile ) {
37
- $ this ->loadHtmlFile ($ html );
38
- } else {
39
- $ this ->loadHtml ($ html );
40
- }
45
+ if ($ string !== null ) {
46
+ $ this ->load ($ string , $ isFile , $ type );
41
47
}
42
48
}
43
49
@@ -85,34 +91,58 @@ public function appendChild($node)
85
91
}
86
92
87
93
/**
88
- * Load HTML from a string .
94
+ * Load HTML or XML .
89
95
*
90
- * @param string $html The HTML string
91
- *
92
- * @return \DiDom\Document
93
- *
94
- * @throws \InvalidArgumentException if the provided argument is not a string
96
+ * @param string $string HTML or XML string or file path
97
+ * @param bool $isFile indicates that in first parameter was passed to the file path
98
+ * @param string $type Type of document
95
99
*/
96
- public function loadHtml ( $ html )
100
+ public function load ( $ string , $ isFile = false , $ type = ' html ' )
97
101
{
98
- if (!is_string ($ html )) {
99
- throw new InvalidArgumentException (sprintf ('%s expects parameter 1 to be string, %s given ' , __METHOD__ , (is_object ($ html ) ? get_class ($ html ) : gettype ($ html ))));
102
+ if (!is_string ($ string )) {
103
+ throw new InvalidArgumentException (sprintf ('%s expects parameter 1 to be string, %s given ' , __METHOD__ , (is_object ($ string ) ? get_class ($ string ) : gettype ($ string ))));
104
+ }
105
+
106
+ if ($ isFile ) {
107
+ $ string = $ this ->loadFile ($ string );
108
+ }
109
+
110
+ if (!is_string ($ string ) or !in_array (strtolower ($ type ), ['xml ' , 'html ' ])) {
111
+ throw new InvalidArgumentException (sprintf ('Document type must be "xml" or "html", %s given ' , __METHOD__ , (is_object ($ type ) ? get_class ($ type ) : gettype ($ type ))));
112
+ }
113
+
114
+ if (substr ($ string , 0 , 5 ) !== '<?xml ' ) {
115
+ $ prolog = sprintf ('<?xml encoding="%s"> ' , $ this ->document ->encoding );
116
+ $ string = $ prolog .$ string ;
100
117
}
101
118
102
- $ prolog = sprintf ('<?xml encoding="%s"> ' , $ this ->document ->encoding );
103
- $ html = $ prolog .$ html ;
119
+ $ this ->type = strtolower ($ type );
104
120
105
121
libxml_use_internal_errors (true );
106
122
libxml_disable_entity_loader (true );
107
123
108
- $ this ->document ->loadHtml ($ html );
124
+ $ this ->type === ' xml ' ? $ this -> document ->loadXml ( $ string ) : $ this -> document -> loadHtml ($ string );
109
125
110
126
libxml_clear_errors ();
111
127
112
128
libxml_disable_entity_loader (false );
113
- libxml_use_internal_errors (false );
129
+ libxml_use_internal_errors (false );
114
130
115
- return $ this ;
131
+ return $ this ;
132
+ }
133
+
134
+ /**
135
+ * Load HTML from a string.
136
+ *
137
+ * @param string $html The HTML string
138
+ *
139
+ * @return \DiDom\Document
140
+ *
141
+ * @throws \InvalidArgumentException if the provided argument is not a string
142
+ */
143
+ public function loadHtml ($ html )
144
+ {
145
+ return $ this ->load ($ html , false , 'html ' );
116
146
}
117
147
118
148
/**
@@ -127,6 +157,41 @@ public function loadHtml($html)
127
157
* @throws \RuntimeException if you are unable to load the file
128
158
*/
129
159
public function loadHtmlFile ($ filepath )
160
+ {
161
+ return $ this ->load ($ filepath , true , 'html ' );
162
+ }
163
+
164
+ /**
165
+ * Load XML from a string.
166
+ *
167
+ * @param string $xml The XML string
168
+ *
169
+ * @return \DiDom\Document
170
+ *
171
+ * @throws \InvalidArgumentException if the provided argument is not a string
172
+ */
173
+ public function loadXml ($ xml )
174
+ {
175
+ return $ this ->load ($ xml , false , 'xml ' );
176
+ }
177
+
178
+ /**
179
+ * Load XML from a file.
180
+ *
181
+ * @param string $filepath The path to the XML file
182
+ *
183
+ * @return \DiDom\Document
184
+ *
185
+ * @throws \InvalidArgumentException if the file path is not a string
186
+ * @throws \RuntimeException if the file does not exist
187
+ * @throws \RuntimeException if you are unable to load the file
188
+ */
189
+ public function loadXmlFile ($ filepath )
190
+ {
191
+ return $ this ->load ($ filepath , true , 'xml ' );
192
+ }
193
+
194
+ protected function loadFile ($ filepath )
130
195
{
131
196
if (!is_string ($ filepath )) {
132
197
throw new InvalidArgumentException (sprintf ('%s expects parameter 1 to be string, %s given ' , __METHOD__ , gettype ($ filepath )));
@@ -138,15 +203,13 @@ public function loadHtmlFile($filepath)
138
203
}
139
204
}
140
205
141
- $ html = file_get_contents ($ filepath );
206
+ $ content = file_get_contents ($ filepath );
142
207
143
- if ($ html === false ) {
208
+ if ($ content === false ) {
144
209
throw new RuntimeException (sprintf ('Could not load file %s ' , $ filepath ));
145
210
}
146
211
147
- $ this ->loadHtml ($ html );
148
-
149
- return $ this ;
212
+ return $ content ;
150
213
}
151
214
152
215
/**
@@ -215,6 +278,16 @@ public function html()
215
278
return trim ($ this ->document ->saveXML ($ this ->getElement ()));
216
279
}
217
280
281
+ /**
282
+ * Dumps the internal document into a string using XML formatting.
283
+ *
284
+ * @return string The document html
285
+ */
286
+ public function xml ()
287
+ {
288
+ return trim ($ this ->document ->saveXML ());
289
+ }
290
+
218
291
/**
219
292
* Nicely formats output with indentation and extra space.
220
293
*
@@ -263,6 +336,16 @@ public function is($document)
263
336
return $ this ->getElement ()->isSameNode ($ element );
264
337
}
265
338
339
+ /**
340
+ * Returns the type of document (XML or HTML).
341
+ *
342
+ * @return string
343
+ */
344
+ public function getType ()
345
+ {
346
+ return $ this ->type ;
347
+ }
348
+
266
349
/**
267
350
* @return \DOMDocument
268
351
*/
@@ -294,7 +377,7 @@ public function toElement()
294
377
*/
295
378
public function __toString ()
296
379
{
297
- return $ this ->html ();
380
+ return $ this ->type === ' xml ' ? $ this -> xml () : $ this -> html ();
298
381
}
299
382
300
383
/**
0 commit comments