From 7a2a66b486f501176c7f712237bf342fd3ea7ea6 Mon Sep 17 00:00:00 2001 From: bytestream Date: Wed, 27 Sep 2023 17:19:39 +0100 Subject: [PATCH] Add support for

---
 src/Text/Elements/ContentModel.php |  1 +
 src/Text/Elements/Element.php      |  7 ++++++-
 src/Text/Elements/Pre.php          | 28 ++++++++++++++++++++++++++++
 src/Text/Elements/Text.php         |  7 +++++--
 test/Text/TextParserTest.php       |  1 +
 test/Text/fixtures/fixture13.html  | 10 ++++++++++
 test/Text/fixtures/fixture13.txt   |  8 ++++++++
 7 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 src/Text/Elements/Pre.php
 create mode 100644 test/Text/fixtures/fixture13.html
 create mode 100644 test/Text/fixtures/fixture13.txt

diff --git a/src/Text/Elements/ContentModel.php b/src/Text/Elements/ContentModel.php
index db9934c..030996d 100644
--- a/src/Text/Elements/ContentModel.php
+++ b/src/Text/Elements/ContentModel.php
@@ -139,6 +139,7 @@ class ContentModel
         'th'  => TableCell::class,
         'img' => Image::class,
         'a'   => Anchor::class,
+        'pre' => Pre::class,
     ];
 
     private DOMNode $node;
diff --git a/src/Text/Elements/Element.php b/src/Text/Elements/Element.php
index dc2a062..944c0c8 100644
--- a/src/Text/Elements/Element.php
+++ b/src/Text/Elements/Element.php
@@ -55,10 +55,15 @@ protected function isWhitespace(string $text): bool
         return strlen(trim($this->processWhitespace($text), "\n\r\t ")) === 0;
     }
 
+    protected function removeZwnjCodes(string $text): string
+    {
+        return str_replace($this->zwnjCodes(), '', $text);
+    }
+
     protected function processWhitespace(string $text): string
     {
         $text = rtrim($text);
-        $text = str_replace($this->zwnjCodes(), '', $text);
+        $text = $this->removeZwnjCodes($text);
         $text = (string) preg_replace("/[\\t\\n\\f\\r ]+/im", ' ', $text);
 
         return trim($text);
diff --git a/src/Text/Elements/Pre.php b/src/Text/Elements/Pre.php
new file mode 100644
index 0000000..5b4b2f5
--- /dev/null
+++ b/src/Text/Elements/Pre.php
@@ -0,0 +1,28 @@
+previous instanceof Margin || $this->previous instanceof Breakline) {
+            return null;
+        }
+
+        return new Margin(1);
+    }
+
+    public function endNode(): ?Content
+    {
+        if ($this->previous instanceof Margin || $this->previous instanceof Breakline) {
+            return null;
+        }
+
+        return new Margin(1);
+    }
+}
diff --git a/src/Text/Elements/Text.php b/src/Text/Elements/Text.php
index a3ac9c8..e2149c0 100644
--- a/src/Text/Elements/Text.php
+++ b/src/Text/Elements/Text.php
@@ -24,8 +24,11 @@ public function endNode(): ?Content
             return null;
         }
 
-        $text = $this->processWhitespace($text);
-        $text = str_replace($this->nbspCodes(), ' ', $text);
+        if (! in_array($this->node->parentNode?->nodeName, ['pre'])) {
+            $text = $this->processWhitespace($text);
+        }
+
+        $text = trim(str_replace([$this->nbspCodes(), $this->zwnjCodes()], ' ', $text));
 
         if (empty($text)) {
             return null;
diff --git a/test/Text/TextParserTest.php b/test/Text/TextParserTest.php
index 6691633..546cb5a 100644
--- a/test/Text/TextParserTest.php
+++ b/test/Text/TextParserTest.php
@@ -38,6 +38,7 @@ public static function toTextProvider(): iterable
         yield [10];
         yield [11];
         yield [12];
+        yield [13];
     }
 
     #[DataProvider('marginProvider')]
diff --git a/test/Text/fixtures/fixture13.html b/test/Text/fixtures/fixture13.html
new file mode 100644
index 0000000..32617c0
--- /dev/null
+++ b/test/Text/fixtures/fixture13.html
@@ -0,0 +1,10 @@
+

Here is the code

+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(){
+	return 0;
+};
+
+
\ No newline at end of file diff --git a/test/Text/fixtures/fixture13.txt b/test/Text/fixtures/fixture13.txt new file mode 100644 index 0000000..0285442 --- /dev/null +++ b/test/Text/fixtures/fixture13.txt @@ -0,0 +1,8 @@ +Here is the code + +#include +#include + +int main(){ + return 0; +}; \ No newline at end of file