From 063c57a6e8e455f37c7a638591553bf17984d5b7 Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Thu, 24 Oct 2024 15:24:03 +0200
Subject: [PATCH 1/7] added epub support

---
 paper2remarkable/providers/__init__.py |  3 +++
 paper2remarkable/ui.py                 | 24 +++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index 7adc63a..04e91f2 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from paper2remarkable.providers.epub import EPUBProvider
 from .acl import ACL
 from .acm import ACM
 from .arxiv import Arxiv
@@ -19,6 +20,7 @@
 from .pubmed import PubMed
 from .semantic_scholar import SemanticScholar
 from .springer import Springer
+from .epub import EPUBProvider
 
 # # The following providers are no longer functional due to Cloudflare blocking
 # # automated access, and have therefore been removed from the list of providers
@@ -49,4 +51,5 @@
     LocalFile,
     PdfUrl,
     HTML,
+    EPUBProvider
 ]
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index 065ac9f..34e52a3 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -19,7 +19,7 @@
 from . import __version__
 from .exceptions import InvalidURLError
 from .exceptions import UnidentifiedSourceError
-from .providers import LocalFile
+from .providers import LocalFile, EPUBProvider
 from .providers import providers
 from .utils import follow_redirects
 from .utils import is_url
@@ -27,8 +27,13 @@
 
 def build_argument_parser():
     parser = argparse.ArgumentParser(
-        description="Paper2reMarkable version %s" % __version__
+        description="Paper2reMarkable version %s - Upload PDFs and EPUBs to reMarkable" % __version__
     )
+    parser.add_argument(
+         "input",
+         help="One or more URLs to a paper or paths to local PDF/EPUB files",
+         nargs="?",
+     )
     parser.add_argument(
         "-b",
         "--blank",
@@ -184,17 +189,22 @@ def choose_provider(cli_input):
         Raised when the input *is* a valid url, but no provider can handle it.
 
     """
+
     provider = cookiejar = None
-    if LocalFile.validate(cli_input):
-        # input is a local file
+
+    # Check if it's a local file first
+    if os.path.exists(cli_input):
         new_input = cli_input
-        provider = LocalFile
+        # If it's an epub, use EPUBProvider
+        if cli_input.lower().endswith('.epub'):
+            provider = EPUBProvider
+        # Otherwise use LocalFile for PDFs
+        else:
+            provider = LocalFile
     elif is_url(cli_input):
-        # input is a url
         new_input, cookiejar = follow_redirects(cli_input)
         provider = next((p for p in providers if p.validate(new_input)), None)
     else:
-        # not a proper URL or non-existent file
         raise UnidentifiedSourceError
 
     if provider is None:

From ca21571e4cfc0e2f7f09ff107bb8bae3b4e5dcec Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Thu, 24 Oct 2024 15:26:26 +0200
Subject: [PATCH 2/7] forgot to add epub file

---
 paper2remarkable/providers/epub.py | 65 ++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 paper2remarkable/providers/epub.py

diff --git a/paper2remarkable/providers/epub.py b/paper2remarkable/providers/epub.py
new file mode 100644
index 0000000..86294db
--- /dev/null
+++ b/paper2remarkable/providers/epub.py
@@ -0,0 +1,65 @@
+from ._base import Provider
+from ..utils import chdir, upload_to_remarkable
+import os, tempfile, shutil
+
+class EPUBProvider(Provider):
+    """Provider for direct EPUB uploads"""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Override operations since we don't need PDF processing
+        self.operations = []  # No operations needed for direct EPUB upload
+
+    @staticmethod
+    def validate(src):
+        """Validate if source is an EPUB file"""
+        # Convert to absolute path before validation
+        abs_path = os.path.abspath(os.path.expanduser(src))
+        return abs_path.lower().endswith('.epub') and os.path.exists(abs_path)
+
+
+    def get_abs_pdf_urls(self, src):
+        """For EPUB files, just return the local path as absolute path"""
+        abs_path = os.path.abspath(os.path.expanduser(src))
+        return abs_path, abs_path
+
+
+    def run(self, src, filename=None):
+        """Override run method to handle EPUB files directly"""
+        # Convert to absolute path
+        src = os.path.abspath(os.path.expanduser(src))
+
+        if not self.validate(src):
+            raise ValueError("Source must be a valid EPUB file")
+
+        # Generate filename if not provided
+        clean_filename = filename or os.path.basename(src)
+        if not clean_filename.endswith('.epub'):
+            clean_filename += '.epub'
+
+        self.initial_dir = os.getcwd()
+        with tempfile.TemporaryDirectory(prefix="p2r_") as working_dir:
+            with chdir(working_dir):
+                # Simply copy the EPUB file
+                shutil.copy(src, clean_filename)
+
+                if self.debug:
+                    print("Paused in debug mode in dir: %s" % working_dir)
+                    print("Press enter to exit.")
+                    return input()
+
+                if self.upload:
+                    return upload_to_remarkable(
+                        clean_filename,
+                        remarkable_dir=self.remarkable_dir,
+                        rmapi_path=self.rmapi_path,
+                    )
+
+                # If not uploading, copy to target directory
+                target_path = os.path.join(self.initial_dir, clean_filename)
+                while os.path.exists(target_path):
+                    base = os.path.splitext(target_path)[0]
+                    target_path = base + "_.epub"
+                shutil.move(clean_filename, target_path)
+
+        return target_path

From d5c2e173d919aec6c4a23de36e62033a161d1f62 Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Thu, 14 Nov 2024 23:32:06 +0100
Subject: [PATCH 3/7] moved epub inside localfilehandler

---
 paper2remarkable/providers/__init__.py |  3 --
 paper2remarkable/providers/_base.py    | 56 ++++++++++++++++------
 paper2remarkable/providers/epub.py     | 65 --------------------------
 paper2remarkable/ui.py                 | 24 +++-------
 4 files changed, 49 insertions(+), 99 deletions(-)
 delete mode 100644 paper2remarkable/providers/epub.py

diff --git a/paper2remarkable/providers/__init__.py b/paper2remarkable/providers/__init__.py
index 04e91f2..7adc63a 100644
--- a/paper2remarkable/providers/__init__.py
+++ b/paper2remarkable/providers/__init__.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 
-from paper2remarkable.providers.epub import EPUBProvider
 from .acl import ACL
 from .acm import ACM
 from .arxiv import Arxiv
@@ -20,7 +19,6 @@
 from .pubmed import PubMed
 from .semantic_scholar import SemanticScholar
 from .springer import Springer
-from .epub import EPUBProvider
 
 # # The following providers are no longer functional due to Cloudflare blocking
 # # automated access, and have therefore been removed from the list of providers
@@ -51,5 +49,4 @@
     LocalFile,
     PdfUrl,
     HTML,
-    EPUBProvider
 ]
diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index 04a4925..df4cad5 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -34,6 +34,7 @@
 class Provider(metaclass=abc.ABCMeta):
     """ABC for providers of pdf sources"""
 
+    SUPPORTED_FORMATS = ["pdf", "ps", "epub"]
     def __init__(
         self,
         verbose=False,
@@ -77,20 +78,41 @@ def __init__(
             logger.disable()
 
         # Define the operations to run on the pdf. Providers can add others.
-        self.operations = [("rewrite", self.rewrite_pdf)]
-        if crop == "center":
-            self.operations.append(("center", self.center_pdf))
-        elif crop == "right":
-            self.operations.append(("right", self.right_pdf))
-        elif crop == "left":
-            self.operations.append(("crop", self.crop_pdf))
+        self.operations = {
+            format: [] for format in self.SUPPORTED_FORMATS
+        }
+        self._configure_operations(crop, blank)
+        logger.info("Starting %s provider" % type(self).__name__)
 
-        if blank:
-            self.operations.append(("blank", blank_pdf))
 
-        self.operations.append(("shrink", self.shrink_pdf))
+    def _configure_operations(self, crop, blank):
+        """Configure operations for PDF and PS formats"""
+        # Formats that need PDF processing
+        pdf_formats = ['pdf', 'ps']
+        def add_operation(formats, operation_name, operation_func):
+            for fmt in formats:
+                self.operations[fmt].append((operation_name, operation_func))
 
-        logger.info("Starting %s provider" % type(self).__name__)
+        # Base operations
+        add_operation(pdf_formats, "rewrite", self.rewrite_pdf)
+
+        # Crop operations mapping
+        crop_operations = {
+            'center': ('center', self.center_pdf),
+            'right': ('right', self.right_pdf),
+            'left': ('crop', self.crop_pdf)
+        }
+
+        # Add crop operation if specified
+        if crop in crop_operations:
+            add_operation(pdf_formats, *crop_operations[crop])
+
+        # Add blank operation if specified
+        if blank:
+            add_operation(pdf_formats, "blank", blank_pdf)
+
+        # PDF-specific shrink operation
+        add_operation(['pdf'], "shrink", self.shrink_pdf)
 
     @staticmethod
     @abc.abstractmethod
@@ -210,17 +232,23 @@ def run(self, src, filename=None):
 
         # generate nice filename if needed
         clean_filename = filename or self.informer.get_filename(abs_url)
-        tmp_filename = "paper.pdf"
+        extension = clean_filename.split(".")[-1]
+        tmp_filename = f"paper.{extension}"
+
+        if extension not in self.SUPPORTED_FORMATS:
+            raise ValueError(f"Unsupported file format {extension}. Must be one of {self.SUPPORTED_FORMATS}")
+
 
         self.initial_dir = os.getcwd()
         with tempfile.TemporaryDirectory(prefix="p2r_") as working_dir:
             with chdir(working_dir):
                 self.retrieve_pdf(pdf_url, tmp_filename)
 
-                assert_file_is_pdf(tmp_filename)
+                if extension in "pdf ps".split():
+                    assert_file_is_pdf(tmp_filename)
 
                 intermediate_fname = tmp_filename
-                for opname, op in self.operations:
+                for opname, op in self.operations[extension]:
                     intermediate_fname = op(intermediate_fname)
 
                 shutil.copy(intermediate_fname, clean_filename)
diff --git a/paper2remarkable/providers/epub.py b/paper2remarkable/providers/epub.py
deleted file mode 100644
index 86294db..0000000
--- a/paper2remarkable/providers/epub.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from ._base import Provider
-from ..utils import chdir, upload_to_remarkable
-import os, tempfile, shutil
-
-class EPUBProvider(Provider):
-    """Provider for direct EPUB uploads"""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        # Override operations since we don't need PDF processing
-        self.operations = []  # No operations needed for direct EPUB upload
-
-    @staticmethod
-    def validate(src):
-        """Validate if source is an EPUB file"""
-        # Convert to absolute path before validation
-        abs_path = os.path.abspath(os.path.expanduser(src))
-        return abs_path.lower().endswith('.epub') and os.path.exists(abs_path)
-
-
-    def get_abs_pdf_urls(self, src):
-        """For EPUB files, just return the local path as absolute path"""
-        abs_path = os.path.abspath(os.path.expanduser(src))
-        return abs_path, abs_path
-
-
-    def run(self, src, filename=None):
-        """Override run method to handle EPUB files directly"""
-        # Convert to absolute path
-        src = os.path.abspath(os.path.expanduser(src))
-
-        if not self.validate(src):
-            raise ValueError("Source must be a valid EPUB file")
-
-        # Generate filename if not provided
-        clean_filename = filename or os.path.basename(src)
-        if not clean_filename.endswith('.epub'):
-            clean_filename += '.epub'
-
-        self.initial_dir = os.getcwd()
-        with tempfile.TemporaryDirectory(prefix="p2r_") as working_dir:
-            with chdir(working_dir):
-                # Simply copy the EPUB file
-                shutil.copy(src, clean_filename)
-
-                if self.debug:
-                    print("Paused in debug mode in dir: %s" % working_dir)
-                    print("Press enter to exit.")
-                    return input()
-
-                if self.upload:
-                    return upload_to_remarkable(
-                        clean_filename,
-                        remarkable_dir=self.remarkable_dir,
-                        rmapi_path=self.rmapi_path,
-                    )
-
-                # If not uploading, copy to target directory
-                target_path = os.path.join(self.initial_dir, clean_filename)
-                while os.path.exists(target_path):
-                    base = os.path.splitext(target_path)[0]
-                    target_path = base + "_.epub"
-                shutil.move(clean_filename, target_path)
-
-        return target_path
diff --git a/paper2remarkable/ui.py b/paper2remarkable/ui.py
index 34e52a3..065ac9f 100644
--- a/paper2remarkable/ui.py
+++ b/paper2remarkable/ui.py
@@ -19,7 +19,7 @@
 from . import __version__
 from .exceptions import InvalidURLError
 from .exceptions import UnidentifiedSourceError
-from .providers import LocalFile, EPUBProvider
+from .providers import LocalFile
 from .providers import providers
 from .utils import follow_redirects
 from .utils import is_url
@@ -27,13 +27,8 @@
 
 def build_argument_parser():
     parser = argparse.ArgumentParser(
-        description="Paper2reMarkable version %s - Upload PDFs and EPUBs to reMarkable" % __version__
+        description="Paper2reMarkable version %s" % __version__
     )
-    parser.add_argument(
-         "input",
-         help="One or more URLs to a paper or paths to local PDF/EPUB files",
-         nargs="?",
-     )
     parser.add_argument(
         "-b",
         "--blank",
@@ -189,22 +184,17 @@ def choose_provider(cli_input):
         Raised when the input *is* a valid url, but no provider can handle it.
 
     """
-
     provider = cookiejar = None
-
-    # Check if it's a local file first
-    if os.path.exists(cli_input):
+    if LocalFile.validate(cli_input):
+        # input is a local file
         new_input = cli_input
-        # If it's an epub, use EPUBProvider
-        if cli_input.lower().endswith('.epub'):
-            provider = EPUBProvider
-        # Otherwise use LocalFile for PDFs
-        else:
-            provider = LocalFile
+        provider = LocalFile
     elif is_url(cli_input):
+        # input is a url
         new_input, cookiejar = follow_redirects(cli_input)
         provider = next((p for p in providers if p.validate(new_input)), None)
     else:
+        # not a proper URL or non-existent file
         raise UnidentifiedSourceError
 
     if provider is None:

From 98c7c8a15838b3729b489b73a2c89d9bafbf5362 Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Thu, 14 Nov 2024 23:34:49 +0100
Subject: [PATCH 4/7] added epub support to LocalProvider

---
 paper2remarkable/providers/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index df4cad5..e3d8c01 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -88,6 +88,7 @@ def __init__(
     def _configure_operations(self, crop, blank):
         """Configure operations for PDF and PS formats"""
         # Formats that need PDF processing
+        # No processing for epubs is assumed
         pdf_formats = ['pdf', 'ps']
         def add_operation(formats, operation_name, operation_func):
             for fmt in formats:

From 3332f86c137a3f7461d79da6131fc7050f90a567 Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Thu, 14 Nov 2024 23:57:50 +0100
Subject: [PATCH 5/7] update unittest to work with operations

---
 paper2remarkable/providers/arxiv.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/paper2remarkable/providers/arxiv.py b/paper2remarkable/providers/arxiv.py
index eabcbba..fb42b39 100644
--- a/paper2remarkable/providers/arxiv.py
+++ b/paper2remarkable/providers/arxiv.py
@@ -40,7 +40,9 @@ def __init__(self, *args, **kwargs):
         self.informer = ArxivInformer()
 
         # register the dearxiv operation
-        self.operations.insert(0, ("dearxiv", self.dearxiv))
+        for format in self.operations:
+            if format in "pdf ps".split():
+                self.operations[format].insert(0, ("dearxiv", self.dearxiv))
 
     def get_abs_pdf_urls(self, url):
         """Get the pdf and abs url from any given arXiv url"""

From a952e46fb2c9662332528baf89d3284119fd9fb4 Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Sun, 5 Jan 2025 13:21:34 +0100
Subject: [PATCH 6/7] added unittest and black formatting

---
 paper2remarkable/providers/_base.py   | 22 +++++++++++-----------
 paper2remarkable/providers/pdf_url.py |  5 +++--
 tests/test_providers.py               |  7 +++++++
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/paper2remarkable/providers/_base.py b/paper2remarkable/providers/_base.py
index e3d8c01..64fd514 100644
--- a/paper2remarkable/providers/_base.py
+++ b/paper2remarkable/providers/_base.py
@@ -35,6 +35,7 @@ class Provider(metaclass=abc.ABCMeta):
     """ABC for providers of pdf sources"""
 
     SUPPORTED_FORMATS = ["pdf", "ps", "epub"]
+
     def __init__(
         self,
         verbose=False,
@@ -78,18 +79,16 @@ def __init__(
             logger.disable()
 
         # Define the operations to run on the pdf. Providers can add others.
-        self.operations = {
-            format: [] for format in self.SUPPORTED_FORMATS
-        }
+        self.operations = {format: [] for format in self.SUPPORTED_FORMATS}
         self._configure_operations(crop, blank)
         logger.info("Starting %s provider" % type(self).__name__)
 
-
     def _configure_operations(self, crop, blank):
         """Configure operations for PDF and PS formats"""
         # Formats that need PDF processing
         # No processing for epubs is assumed
-        pdf_formats = ['pdf', 'ps']
+        pdf_formats = ["pdf", "ps"]
+
         def add_operation(formats, operation_name, operation_func):
             for fmt in formats:
                 self.operations[fmt].append((operation_name, operation_func))
@@ -99,9 +98,9 @@ def add_operation(formats, operation_name, operation_func):
 
         # Crop operations mapping
         crop_operations = {
-            'center': ('center', self.center_pdf),
-            'right': ('right', self.right_pdf),
-            'left': ('crop', self.crop_pdf)
+            "center": ("center", self.center_pdf),
+            "right": ("right", self.right_pdf),
+            "left": ("crop", self.crop_pdf),
         }
 
         # Add crop operation if specified
@@ -113,7 +112,7 @@ def add_operation(formats, operation_name, operation_func):
             add_operation(pdf_formats, "blank", blank_pdf)
 
         # PDF-specific shrink operation
-        add_operation(['pdf'], "shrink", self.shrink_pdf)
+        add_operation(["pdf"], "shrink", self.shrink_pdf)
 
     @staticmethod
     @abc.abstractmethod
@@ -237,8 +236,9 @@ def run(self, src, filename=None):
         tmp_filename = f"paper.{extension}"
 
         if extension not in self.SUPPORTED_FORMATS:
-            raise ValueError(f"Unsupported file format {extension}. Must be one of {self.SUPPORTED_FORMATS}")
-
+            raise ValueError(
+                f"Unsupported file format {extension}. Must be one of {self.SUPPORTED_FORMATS}"
+            )
 
         self.initial_dir = os.getcwd()
         with tempfile.TemporaryDirectory(prefix="p2r_") as working_dir:
diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py
index 4a58f10..f319668 100644
--- a/paper2remarkable/providers/pdf_url.py
+++ b/paper2remarkable/providers/pdf_url.py
@@ -33,11 +33,12 @@ def get_filename(self, abs_url):
             )
 
         filename = path_parts[-1]
-        if not filename.endswith(".pdf"):
+        ext = filename.split(".")[-1]
+        if ext not in [".pdf", "epub"]:
             raise FilenameMissingError(
                 provider="PdfUrl",
                 url=abs_url,
-                reason="URL path didn't end in .pdf",
+                reason="URL path didn't end in .pdf or .epub",
             )
         logger.warning(
             "Using filename {filename} extracted from url. "
diff --git a/tests/test_providers.py b/tests/test_providers.py
index 6cb1996..0ae0838 100644
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@@ -193,6 +193,13 @@ def test_pdfurl_2(self):
         filename = prov.run(url)
         self.assertEqual("NoREC.pdf", os.path.basename(filename))
 
+    def test_epub(self):
+        prov = PdfUrl(upload=False, verbose=VERBOSE)
+        url = "https://www.gutenberg.org/ebooks/2701.epub.images"
+        filename = prov.run(url)
+        exp = "pg2701-images.epub"
+        self.assertEqual(exp, os.path.basename(filename))
+
     def test_jmlr_1(self):
         prov = JMLR(upload=False, verbose=VERBOSE)
         url = "http://www.jmlr.org/papers/volume17/14-526/14-526.pdf"

From 2f83eb24b796a934563bf0f7d09c991405d2257c Mon Sep 17 00:00:00 2001
From: cvanelteren <caspervanelteren@gmail.com>
Date: Mon, 6 Jan 2025 08:37:30 +0100
Subject: [PATCH 7/7] fixed typo

---
 paper2remarkable/providers/pdf_url.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paper2remarkable/providers/pdf_url.py b/paper2remarkable/providers/pdf_url.py
index f319668..11d8e0d 100644
--- a/paper2remarkable/providers/pdf_url.py
+++ b/paper2remarkable/providers/pdf_url.py
@@ -34,7 +34,7 @@ def get_filename(self, abs_url):
 
         filename = path_parts[-1]
         ext = filename.split(".")[-1]
-        if ext not in [".pdf", "epub"]:
+        if ext not in ["pdf", "epub"]:
             raise FilenameMissingError(
                 provider="PdfUrl",
                 url=abs_url,