From 6da96814759cb0d822c53b5cfe073b236a9f1280 Mon Sep 17 00:00:00 2001
From: Dan Davis <daniel.davis@nih.gov>
Date: Thu, 4 Mar 2021 14:32:28 -0500
Subject: [PATCH] Add a MarcSearch class to memoize patterns.

---
 README.md              | 22 ++++++++++++++++++++++
 VERSION.txt            |  2 +-
 pymarcspec/__init__.py |  2 +-
 pymarcspec/search.py   | 34 +++++++++++++++++++++++++++++-----
 4 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 0195297..dce459c 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,28 @@ with open(sys.argv[1], 'rb') as f:
         print(subjects)
 ```
 
+There is also a `MarcSearch` object that memoizes each search expression, so that 
+you can conveniently run a number of different searches without creating several
+parsed specs. For example:
+
+```python
+import csv
+import sys
+from pymarcspec import MarcSearch
+from pymarc import MARCReader
+
+writer = csv.writer(sys.stdout, dialect='unix', quoting=csv.QUOTE_MINIMAL)
+writer.writerow(['id', 'title', 'subjects'])
+
+marcsearch = MarcSearch()
+with open(sys.argv[1], 'rb') as f:
+    for record in MARCReader(f):
+        control_id = marcsearch.search('100', record)
+        title = marcsearch.search('245[0]$a-c', record)
+        subjects = marcsearch.search('650$a', record, field_delimiter=', ')
+        writer.writerow([control_id, title, subjects])        
+```
+
 ## Development
 
 ### Building the Parser
diff --git a/VERSION.txt b/VERSION.txt
index 8acdd82..4e379d2 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-0.0.1
+0.0.2
diff --git a/pymarcspec/__init__.py b/pymarcspec/__init__.py
index cf6a287..01ceecb 100644
--- a/pymarcspec/__init__.py
+++ b/pymarcspec/__init__.py
@@ -1,4 +1,4 @@
 from .parser import MarcSpecParser  # noqa:
 from .semantics import MarcSearchSemantics  # noqa:
-from .search import MarcSearchParser  # noqa:
+from .search import MarcSearchParser, MarcSearch  # noqa:
 from .model import MarcSpec  # noqa:
\ No newline at end of file
diff --git a/pymarcspec/search.py b/pymarcspec/search.py
index 592e326..ba21a78 100644
--- a/pymarcspec/search.py
+++ b/pymarcspec/search.py
@@ -9,7 +9,6 @@
 from .semantics import MarcSearchSemantics
 
 
-# memoize compiling of strings into AST using some searcher
 class MarcSearchParser(MarcSpecParser):
     def __init__(self, *args, **kwargs):
         kwargs.update({
@@ -17,19 +16,44 @@ def __init__(self, *args, **kwargs):
             'semantics': MarcSearchSemantics()
         })
         super().__init__(*args, **kwargs)
+        self.memoized = dict()
+
+
+# memoize compiling of strings into specs
+class MarcSearch:
+    """
+    Memoizes compiled specifications to offset
+    cost of compiling each again and again.
+
+    Can be used over multiple records and
+    multiple specs.
+    """
+    def __init__(self):
+        self.parser = MarcSearchParser()
+        self.specs = dict()
+
+    def parse(self, spec):
+        compiled_spec = self.specs.get(spec)
+        if compiled_spec is None:
+            self.specs[spec] = compiled_spec = self.parser.parse(spec)
+        return compiled_spec
+
+    def search(self, spec, record, **kwargs):
+        compiled_spec = self.parse(spec)
+        return compiled_spec.search(record, **kwargs)
 
 
 def marc_search(marcspec, stream, field_delimiter=':', subfield_delimiter=''):
-    parser = MarcSearchParser()
-    spec = parser.parse(marcspec)
+    searcher = MarcSearch()
+    searcher.parse(marcspec)
 
     if stream.name.endswith('.xml'):
         generator = parse_xml_to_array(stream)
     else:
         generator = MARCReader(stream)
     for record in generator:
-        result = spec.search(
-            record,
+        result = searcher.search(
+            marcspec, record,
             field_delimiter=field_delimiter,
             subfield_delimiter=subfield_delimiter
         )