Skip to content

Commit

Permalink
Merge pull request #1 from danizen/develop
Browse files Browse the repository at this point in the history
Add a MarcSearch class to memoize patterns.
  • Loading branch information
danizen authored Mar 4, 2021
2 parents 024a4de + 6da9681 commit 9fe1137
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 7 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,28 @@ with open(sys.argv[1], 'rb') as f:
print(subjects)
```

There is also a `MarcSearch` object that memoizes each search expression, so that
you can conveniently run a number of different searches without creating several
parsed specs. For example:

```python
import csv
import sys
from pymarcspec import MarcSearch
from pymarc import MARCReader

writer = csv.writer(sys.stdout, dialect='unix', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['id', 'title', 'subjects'])

marcsearch = MarcSearch()
with open(sys.argv[1], 'rb') as f:
for record in MARCReader(f):
control_id = marcsearch.search('100', record)
title = marcsearch.search('245[0]$a-c', record)
subjects = marcsearch.search('650$a', record, field_delimiter=', ')
writer.writerow([control_id, title, subjects])
```

## Development

### Building the Parser
Expand Down
2 changes: 1 addition & 1 deletion VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.1
0.0.2
2 changes: 1 addition & 1 deletion pymarcspec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .parser import MarcSpecParser # noqa:
from .semantics import MarcSearchSemantics # noqa:
from .search import MarcSearchParser # noqa:
from .search import MarcSearchParser, MarcSearch # noqa:
from .model import MarcSpec # noqa:
34 changes: 29 additions & 5 deletions pymarcspec/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,51 @@
from .semantics import MarcSearchSemantics


# memoize compiling of strings into AST using some searcher
class MarcSearchParser(MarcSpecParser):
def __init__(self, *args, **kwargs):
kwargs.update({
'whitespace': '',
'semantics': MarcSearchSemantics()
})
super().__init__(*args, **kwargs)
self.memoized = dict()


# memoize compiling of strings into specs
class MarcSearch:
"""
Memoizes compiled specifications to offset
cost of compiling each again and again.
Can be used over multiple records and
multiple specs.
"""
def __init__(self):
self.parser = MarcSearchParser()
self.specs = dict()

def parse(self, spec):
compiled_spec = self.specs.get(spec)
if compiled_spec is None:
self.specs[spec] = compiled_spec = self.parser.parse(spec)
return compiled_spec

def search(self, spec, record, **kwargs):
compiled_spec = self.parse(spec)
return compiled_spec.search(record, **kwargs)


def marc_search(marcspec, stream, field_delimiter=':', subfield_delimiter=''):
parser = MarcSearchParser()
spec = parser.parse(marcspec)
searcher = MarcSearch()
searcher.parse(marcspec)

if stream.name.endswith('.xml'):
generator = parse_xml_to_array(stream)
else:
generator = MARCReader(stream)
for record in generator:
result = spec.search(
record,
result = searcher.search(
marcspec, record,
field_delimiter=field_delimiter,
subfield_delimiter=subfield_delimiter
)
Expand Down

0 comments on commit 9fe1137

Please sign in to comment.