From e65a404b13e55d64354355ef3293ffcead0f1a65 Mon Sep 17 00:00:00 2001 From: perrette Date: Tue, 25 Apr 2023 12:08:29 +0200 Subject: [PATCH] split test_papers.py into several test modules for clarity #48 --- tests/common.py | 93 +++++ tests/config.py | 22 ++ tests/{test_papers.py => test_add.py} | 535 +------------------------- tests/test_duplicates.py | 361 +++++++++++++++++ tests/test_encoding.py | 41 ++ tests/test_extract.py | 25 ++ tests/test_install.py | 26 ++ 7 files changed, 579 insertions(+), 524 deletions(-) create mode 100644 tests/common.py create mode 100644 tests/config.py rename tests/{test_papers.py => test_add.py} (50%) create mode 100644 tests/test_duplicates.py create mode 100644 tests/test_encoding.py create mode 100644 tests/test_extract.py create mode 100644 tests/test_install.py diff --git a/tests/common.py b/tests/common.py new file mode 100644 index 0000000..fc55ff9 --- /dev/null +++ b/tests/common.py @@ -0,0 +1,93 @@ +import subprocess as sp +import unittest +import difflib +from tests.download import downloadpdf + +# Using python -m papers instead of papers otherwise pytest --cov does not detect the call +PAPERSCMD = f'python3 -m papers' + +def paperscmd(cmd, sp_cmd="check_output"): + return run(f'{PAPERSCMD} '+cmd, sp_cmd=sp_cmd) + +def run(cmd, sp_cmd="check_output"): + print(cmd) + if sp_cmd == "check_output": + return str(sp.check_output(cmd, shell=True).strip().decode()) + else: + return str(getattr(sp, sp_cmd)(cmd, shell=True)) + + + +def prepare_paper(): + pdf = downloadpdf('bg-8-515-2011.pdf') + doi = '10.5194/bg-8-515-2011' + key = 'Perrette_2011' + newkey = 'perrette_yool2011' + year = '2011' + bibtex = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/bg-8-515-2011}, + journal = {Biogeosciences}, + month = {feb}, + number = {2}, + pages = {515--524}, + publisher = {Copernicus {GmbH}}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, + url = {https://doi.org/10.5194%2Fbg-8-515-2011}, + volume = {8}, + year = 2011, +}""" + + file_rename = "perrette_et_al_2011_near-ubiquity-of-ice-edge-blooms-in-the-arctic.pdf" + + return pdf, doi, key, newkey, year, bibtex, file_rename + + +def prepare_paper2(): + pdf = downloadpdf('esd-4-11-2013.pdf') + si = downloadpdf('esd-4-11-2013-supplement.pdf') + doi = '10.5194/esd-4-11-2013' + key = 'Perrette_2013' + newkey = 'perrette_landerer2013' + year = '2013' + bibtex = """@article{Perrette_2013, + author = {M. Perrette and F. Landerer and R. Riva and K. Frieler and M. Meinshausen}, + doi = {10.5194/esd-4-11-2013}, + journal = {Earth System Dynamics}, + month = {jan}, + number = {1}, + pages = {11--29}, + publisher = {Copernicus {GmbH}}, + title = {A scaling approach to project regional sea level rise and its uncertainties}, + url = {https://doi.org/10.5194%2Fesd-4-11-2013}, + volume = {4}, + year = 2013, +}""" + file_rename = "perrette_et_al_2013_a-scaling-approach-to-project-regional-sea-level-rise-and-its-uncertainties.pdf" + + return pdf, si, doi, key, newkey, year, bibtex, file_rename + + + +class BibTest(unittest.TestCase): + """base class for bib tests: create a new bibliography + """ + + def assertMultiLineEqual(self, first, second, msg=None): + """Assert that two multi-line strings are equal. + + If they aren't, show a nice diff. + source: https://stackoverflow.com/a/3943697/2192272 + """ + self.assertTrue(isinstance(first, str), + 'First argument is not a string') + self.assertTrue(isinstance(second, str), + 'Second argument is not a string') + + if first != second: + message = ''.join(difflib.ndiff(first.splitlines(True), + second.splitlines(True))) + if msg: + message += " : " + msg + self.fail("Multi-line strings are unequal:\n" + message) + diff --git a/tests/config.py b/tests/config.py new file mode 100644 index 0000000..79025d5 --- /dev/null +++ b/tests/config.py @@ -0,0 +1,22 @@ +import unittest +import os, subprocess as sp +import tempfile, shutil +import difflib +from pathlib import Path + +from papers.extract import extract_pdf_metadata +from papers.bib import Biblio, bibtexparser, parse_file, format_file +from tests.download import downloadpdf + +# Using python -m papers instead of papers otherwise pytest --cov does not detect the call +PAPERSCMD = f'python3 -m papers' + +def paperscmd(cmd, sp_cmd="check_output"): + return run(f'{PAPERSCMD} '+cmd, sp_cmd=sp_cmd) + +def run(cmd, sp_cmd="check_output"): + print(cmd) + if sp_cmd == "check_output": + return str(sp.check_output(cmd, shell=True).strip().decode()) + else: + return str(getattr(sp, sp_cmd)(cmd, shell=True)) \ No newline at end of file diff --git a/tests/test_papers.py b/tests/test_add.py similarity index 50% rename from tests/test_papers.py rename to tests/test_add.py index bc7323e..c930518 100644 --- a/tests/test_papers.py +++ b/tests/test_add.py @@ -1,142 +1,15 @@ +import os +import shutil +import subprocess as sp +import tempfile import unittest -import os, subprocess as sp -import tempfile, shutil -import difflib from pathlib import Path -from papers.extract import extract_pdf_metadata -from papers.bib import Biblio, bibtexparser, parse_file, format_file -from tests.download import downloadpdf - -# Using python -m papers instead of papers otherwise pytest --cov does not detect the call -PAPERSCMD = f'python3 -m papers' - -def paperscmd(cmd, sp_cmd="check_output"): - return run(f'{PAPERSCMD} '+cmd, sp_cmd=sp_cmd) - -def run(cmd, sp_cmd="check_output"): - print(cmd) - if sp_cmd == "check_output": - return str(sp.check_output(cmd, shell=True).strip().decode()) - else: - return str(getattr(sp, sp_cmd)(cmd, shell=True)) - -def prepare_paper(): - pdf = downloadpdf('bg-8-515-2011.pdf') - doi = '10.5194/bg-8-515-2011' - key = 'Perrette_2011' - newkey = 'perrette_yool2011' - year = '2011' - bibtex = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/bg-8-515-2011}, - journal = {Biogeosciences}, - month = {feb}, - number = {2}, - pages = {515--524}, - publisher = {Copernicus {GmbH}}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, - url = {https://doi.org/10.5194%2Fbg-8-515-2011}, - volume = {8}, - year = 2011, -}""" - - file_rename = "perrette_et_al_2011_near-ubiquity-of-ice-edge-blooms-in-the-arctic.pdf" - - return pdf, doi, key, newkey, year, bibtex, file_rename - - -def prepare_paper2(): - pdf = downloadpdf('esd-4-11-2013.pdf') - si = downloadpdf('esd-4-11-2013-supplement.pdf') - doi = '10.5194/esd-4-11-2013' - key = 'Perrette_2013' - newkey = 'perrette_landerer2013' - year = '2013' - bibtex = """@article{Perrette_2013, - author = {M. Perrette and F. Landerer and R. Riva and K. Frieler and M. Meinshausen}, - doi = {10.5194/esd-4-11-2013}, - journal = {Earth System Dynamics}, - month = {jan}, - number = {1}, - pages = {11--29}, - publisher = {Copernicus {GmbH}}, - title = {A scaling approach to project regional sea level rise and its uncertainties}, - url = {https://doi.org/10.5194%2Fesd-4-11-2013}, - volume = {4}, - year = 2013, -}""" - file_rename = "perrette_et_al_2013_a-scaling-approach-to-project-regional-sea-level-rise-and-its-uncertainties.pdf" - - return pdf, si, doi, key, newkey, year, bibtex, file_rename - -class TestBibtexFileEntry(unittest.TestCase): - - def test_parse_file(self): - file = parse_file('file.pdf:/path/to/file.pdf:pdf') - self.assertEqual(file, ['/path/to/file.pdf']) - file = parse_file(':/path/to/file.pdf:pdf') - self.assertEqual(file, ['/path/to/file.pdf']) - file = parse_file('/path/to/file.pdf:pdf') - self.assertEqual(file, ['/path/to/file.pdf']) - file = parse_file('/path/to/file.pdf') - self.assertEqual(file, ['/path/to/file.pdf']) - file = parse_file(':/path/to/file.pdf:') - self.assertEqual(file, ['/path/to/file.pdf']) - - - def test_parse_files(self): - files = parse_file(':/path/to/file1.pdf:pdf;:/path/to/file2.pdf:pdf') - self.assertEqual(files, ['/path/to/file1.pdf','/path/to/file2.pdf']) - - - def test_format_file(self): - field = format_file(['/path/to/file.pdf']) - self.assertEqual(field, ':/path/to/file.pdf:pdf') - - - def test_format_files(self): - field = format_file(['/path/to/file1.pdf','/path/to/file2.pdf']) - self.assertEqual(field, ':/path/to/file1.pdf:pdf;:/path/to/file2.pdf:pdf') - - +import bibtexparser -class TestSimple(unittest.TestCase): - - def setUp(self): - self.pdf, self.doi, self.key, self.newkey, self.year, self.bibtex, self.file_rename = prepare_paper() - self.assertTrue(os.path.exists(self.pdf)) - - def test_doi(self): - self.assertEqual(paperscmd(f'doi {self.pdf}').strip(), self.doi) - - def test_fetch(self): - bibtexs = paperscmd(f'fetch {self.doi}').strip() - db1 = bibtexparser.loads(bibtexs) - db2 = bibtexparser.loads(self.bibtex) - self.assertEqual(db1.entries, db2.entries) - - def test_fetch_scholar(self): - extract_pdf_metadata(self.pdf, scholar=True) - -class TestInstall(unittest.TestCase): - - def setUp(self): - self.mybib = tempfile.mktemp(prefix='papers.bib') - self.filesdir = tempfile.mktemp(prefix='papers.files') - - def test_local_install(self): - paperscmd(f'install --local --bibtex {self.mybib} --files {self.filesdir}') - self.assertTrue(os.path.exists(self.mybib)) - self.assertTrue(os.path.exists(self.filesdir)) +from papers.bib import Biblio +from tests.common import PAPERSCMD, paperscmd, prepare_paper, prepare_paper2, BibTest - def tearDown(self): - if os.path.exists(self.filesdir): - shutil.rmtree(self.filesdir) - if os.path.exists(self.mybib): - os.remove(self.mybib) - if os.path.exists('.papersconfig.json'): - os.remove('.papersconfig.json') class TestAdd(unittest.TestCase): @@ -324,392 +197,11 @@ def tearDown(self): os.remove('.papersconfig.json') -class BibTest(unittest.TestCase): - """base class for bib tests: create a new bibliography - """ - - def assertMultiLineEqual(self, first, second, msg=None): - """Assert that two multi-line strings are equal. - - If they aren't, show a nice diff. - source: https://stackoverflow.com/a/3943697/2192272 - """ - self.assertTrue(isinstance(first, str), - 'First argument is not a string') - self.assertTrue(isinstance(second, str), - 'Second argument is not a string') - - if first != second: - message = ''.join(difflib.ndiff(first.splitlines(True), - second.splitlines(True))) - if msg: - message += " : " + msg - self.fail("Multi-line strings are unequal:\n" + message) - - - -class SimilarityBase(unittest.TestCase): - - similarity = None - - reference = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/bg-8-515-2011}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, - year = {2011} -}""" - - anotherkey = """@article{OtherKey, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/bg-8-515-2011}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, - year = {2011} -}""" - - missingfield = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/bg-8-515-2011}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, -}""" - - missingdoi = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, -}""" - - missingtitauthor = """@article{Perrette_2011, - doi = {10.5194/bg-8-515-2011}, -}""" - - conflictauthor = """@article{Perrette_2011, - author = {SomeOneElse}, - doi = {10.5194/bg-8-515-2011}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, -}""" - - conflictdoi = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/XXX}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, -}""" - - conflictyear = """@article{Perrette_2011, - author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, - doi = {10.5194/bg-8-515-2011}, - title = {Near-ubiquity of ice-edge blooms in the Arctic}, - year = {2012} -}""" - - - def isduplicate(self, a, b): - """test Biblio's eq method for duplicates - """ - db = bibtexparser.loads(a+'\n'+b) - e1, e2 = db.entries - refs = Biblio(similarity=self.similarity) - return refs.eq(e1, e2) - - -class TestDuplicatesExact(SimilarityBase): - - similarity = 'EXACT' - - def test_exactsame(self): - self.assertTrue(self.isduplicate(self.reference, self.reference)) - - def test_anotherkey(self): - self.assertFalse(self.isduplicate(self.reference, self.anotherkey)) - - def test_missingfield(self): - self.assertFalse(self.isduplicate(self.reference, self.missingfield)) - - def test_missingdoi(self): - self.assertFalse(self.isduplicate(self.reference, self.missingdoi)) - - def test_missingtitauthor(self): - self.assertFalse(self.isduplicate(self.reference, self.missingtitauthor)) - - def test_conflictauthor(self): - self.assertFalse(self.isduplicate(self.reference, self.conflictauthor)) - - def test_conflictdoi(self): - self.assertFalse(self.isduplicate(self.reference, self.conflictdoi)) - - def test_conflictyear(self): - self.assertFalse(self.isduplicate(self.reference, self.conflictyear)) - - -class TestDuplicatesGood(TestDuplicatesExact): - - similarity = 'GOOD' - - def test_anotherkey(self): - self.assertTrue(self.isduplicate(self.reference, self.anotherkey)) - - def test_missingfield(self): - self.assertTrue(self.isduplicate(self.reference, self.missingfield)) - - def test_conflictyear(self): - self.assertTrue(self.isduplicate(self.reference, self.conflictyear)) - - -class TestDuplicatesFair(TestDuplicatesGood): - - similarity = 'FAIR' - - def test_missingtitauthor(self): - self.assertTrue(self.isduplicate(self.reference, self.missingtitauthor)) - - def test_conflictauthor(self): - self.assertTrue(self.isduplicate(self.reference, self.conflictauthor)) - - -class TestDuplicatesPartial(TestDuplicatesFair): - - similarity = 'PARTIAL' - - def test_missingdoi(self): - self.assertTrue(self.isduplicate(self.reference, self.missingdoi)) - - def test_conflictdoi(self): - self.assertTrue(self.isduplicate(self.reference, self.conflictdoi)) - - -class TestDuplicates(TestDuplicatesPartial): - - @staticmethod - def isduplicate(a, b): - """test Biblio's eq method for duplicates - """ - db = bibtexparser.loads(a+'\n'+b) - e1, e2 = db.entries - refs = Biblio() - return refs.eq(e1, e2) - - -class TestDuplicatesAdd(TestDuplicates): - - def setUp(self): - self.mybib = tempfile.mktemp(prefix='papers.bib') - self.otherbib = tempfile.mktemp(prefix='papers.otherbib') - - def tearDown(self): - os.remove(self.mybib) - os.remove(self.otherbib) - - def isduplicate(self, a, b): - """test Biblio's eq method in 'add' mode - """ - open(self.mybib, 'w').write(a) - open(self.otherbib, 'w').write(b) - res = paperscmd(f'add {self.otherbib} --bibtex {self.mybib} --update-key --mode r --debug', sp_cmd="call") - return res != 0 - - @unittest.skip("skip cause does not make sense with add") - def test_exactsame(self): - pass - - @unittest.skip("skip cause does not make sense with add") - def test_anotherkey(self): - pass - - - -class TestAddResolveDuplicate(BibTest): - - original = """@article{Perrette_2011, - doi = {10.5194/bg-8-515-2011}, - journal = {Biogeosciences}, - year = {RareYear} -}""" - - - conflict = """@article{AnotherKey, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {ConflictJournal} -}""" - - - def setUp(self): - self.mybib = tempfile.mktemp(prefix='papers.bib') - self.otherbib = tempfile.mktemp(prefix='papers.otherbib') - open(self.mybib, 'w').write(self.original) - - def tearDown(self): - os.remove(self.mybib) - os.remove(self.otherbib) - - def command(self, mode): - return f'echo {mode} | {PAPERSCMD} add {self.otherbib} --bibtex {self.mybib} --debug' - - def test_overwrite(self): - - expected = self.conflict - - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('o'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_skip(self): - - expected = self.original - - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('s'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - def test_append(self): - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('a'), shell=True) - # paperscmd(f'add {} --bibtex {} --debug'.format(self.otherbib, self.mybib)) - expected = self.conflict + '\n\n' + self.original - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_raises(self): - # update key to new entry, but does not merge... - open(self.otherbib, 'w').write(self.conflict) - func = lambda: sp.check_call(self.command('r'), shell=True) - self.assertRaises(Exception, func) - - - def test_original_updated_from_conflict(self): - - expected = """@article{Perrette_2011, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {Biogeosciences}, - year = {RareYear} -}""" - - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('u'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_conflict_updated_from_original(self): - - expected = """@article{AnotherKey, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {ConflictJournal}, - year = {RareYear} -}""" - - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('U'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_conflict_updated_from_original_but_originalkey(self): - - expected = """@article{Perrette_2011, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {ConflictJournal}, - year = {RareYear} -}""" - open(self.otherbib, 'w').write(self.conflict) - sp.check_call(self.command('U') + ' --update-key', shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - -class TestAddResolveDuplicateCommand(TestAddResolveDuplicate): - - def command(self, mode): - return f'{PAPERSCMD} add {self.otherbib} --bibtex {self.mybib} --mode {mode} --debug' - - - -class TestCheckResolveDuplicate(BibTest): - - original = """@article{Perrette_2011, - doi = {10.5194/bg-8-515-2011}, - journal = {Biogeosciences}, - year = {RareYear} -}""" - - - conflict = """@article{AnotherKey, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {ConflictJournal} -}""" - - - def setUp(self): - self.mybib = tempfile.mktemp(prefix='papers.bib') - open(self.mybib, 'w').write(self.original + '\n\n' + self.conflict) - - def tearDown(self): - os.remove(self.mybib) - - def command(self, mode): - return f'echo {mode} | {PAPERSCMD} check --duplicates --bibtex {self.mybib} --debug' - - def test_pick_conflict_1(self): - - expected = self.conflict - - sp.check_call(self.command('1'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - def test_pick_reference_2(self): - - expected = self.original - - sp.check_call(self.command('2'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_skip_check(self): - - expected = self.conflict + '\n\n' + self.original - - sp.check_call(self.command('s'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_not_a_duplicate(self): - - expected = self.conflict + '\n\n' + self.original - - sp.check_call(self.command('n'), shell=True) - self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - def test_raises(self): - # update key to new entry, but does not merge... - func = lambda: sp.check_call(self.command('r'), shell=True) - self.assertRaises(Exception, func) - - - def test_merge(self): - # update key to new entry, but does not merge... - expected = """@article{AnotherKey, - author = {New Author Field}, - doi = {10.5194/bg-8-515-2011}, - journal = {ConflictJournal}, - year = {RareYear} - }""" - func = lambda: sp.check_call(self.command('m\n3'), shell=True) - self.assertRaises(Exception, func) - - - -class TestUnicode(BibTest): - pass - - -class TestUnicodeVsLatexEncoding(BibTest): - pass - - -## KEEP FOR NOW BUT TRASH ASAP: +## The test below were written first. There are not systematic but they have the advantage to exist. +## Short after they have been written, they were considered deprecated. +## Now as I write these lines I cannot immediately grasp what is wrong with them. +## Probably best to keep them for now, and to review them at some point in the future to remove any redundancy with other tests. class TestAddConflict(BibTest): ## TODO: tear down in several smaller tests @@ -923,8 +415,3 @@ def test_add_miss_titauthor_merge(self): paperscmd(f'add {self.otherbib} --mode u --bibtex {self.mybib} --debug') expected = self.bibtex self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change - - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_duplicates.py b/tests/test_duplicates.py new file mode 100644 index 0000000..c92ad95 --- /dev/null +++ b/tests/test_duplicates.py @@ -0,0 +1,361 @@ +import os +import subprocess as sp +import tempfile +import unittest + +import bibtexparser + +from papers.bib import Biblio +from tests.common import PAPERSCMD, paperscmd, BibTest + + +class SimilarityBase(unittest.TestCase): + + similarity = None + + reference = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/bg-8-515-2011}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, + year = {2011} +}""" + + anotherkey = """@article{OtherKey, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/bg-8-515-2011}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, + year = {2011} +}""" + + missingfield = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/bg-8-515-2011}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, +}""" + + missingdoi = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, +}""" + + missingtitauthor = """@article{Perrette_2011, + doi = {10.5194/bg-8-515-2011}, +}""" + + conflictauthor = """@article{Perrette_2011, + author = {SomeOneElse}, + doi = {10.5194/bg-8-515-2011}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, +}""" + + conflictdoi = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/XXX}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, +}""" + + conflictyear = """@article{Perrette_2011, + author = {M. Perrette and A. Yool and G. D. Quartly and E. E. Popova}, + doi = {10.5194/bg-8-515-2011}, + title = {Near-ubiquity of ice-edge blooms in the Arctic}, + year = {2012} +}""" + + + def isduplicate(self, a, b): + """test Biblio's eq method for duplicates + """ + db = bibtexparser.loads(a+'\n'+b) + e1, e2 = db.entries + refs = Biblio(similarity=self.similarity) + return refs.eq(e1, e2) + + +class TestDuplicatesExact(SimilarityBase): + + similarity = 'EXACT' + + def test_exactsame(self): + self.assertTrue(self.isduplicate(self.reference, self.reference)) + + def test_anotherkey(self): + self.assertFalse(self.isduplicate(self.reference, self.anotherkey)) + + def test_missingfield(self): + self.assertFalse(self.isduplicate(self.reference, self.missingfield)) + + def test_missingdoi(self): + self.assertFalse(self.isduplicate(self.reference, self.missingdoi)) + + def test_missingtitauthor(self): + self.assertFalse(self.isduplicate(self.reference, self.missingtitauthor)) + + def test_conflictauthor(self): + self.assertFalse(self.isduplicate(self.reference, self.conflictauthor)) + + def test_conflictdoi(self): + self.assertFalse(self.isduplicate(self.reference, self.conflictdoi)) + + def test_conflictyear(self): + self.assertFalse(self.isduplicate(self.reference, self.conflictyear)) + + +class TestDuplicatesGood(TestDuplicatesExact): + + similarity = 'GOOD' + + def test_anotherkey(self): + self.assertTrue(self.isduplicate(self.reference, self.anotherkey)) + + def test_missingfield(self): + self.assertTrue(self.isduplicate(self.reference, self.missingfield)) + + def test_conflictyear(self): + self.assertTrue(self.isduplicate(self.reference, self.conflictyear)) + + +class TestDuplicatesFair(TestDuplicatesGood): + + similarity = 'FAIR' + + def test_missingtitauthor(self): + self.assertTrue(self.isduplicate(self.reference, self.missingtitauthor)) + + def test_conflictauthor(self): + self.assertTrue(self.isduplicate(self.reference, self.conflictauthor)) + + +class TestDuplicatesPartial(TestDuplicatesFair): + + similarity = 'PARTIAL' + + def test_missingdoi(self): + self.assertTrue(self.isduplicate(self.reference, self.missingdoi)) + + def test_conflictdoi(self): + self.assertTrue(self.isduplicate(self.reference, self.conflictdoi)) + + +class TestDuplicates(TestDuplicatesPartial): + + @staticmethod + def isduplicate(a, b): + """test Biblio's eq method for duplicates + """ + db = bibtexparser.loads(a+'\n'+b) + e1, e2 = db.entries + refs = Biblio() + return refs.eq(e1, e2) + + +class TestDuplicatesAdd(TestDuplicates): + + def setUp(self): + self.mybib = tempfile.mktemp(prefix='papers.bib') + self.otherbib = tempfile.mktemp(prefix='papers.otherbib') + + def tearDown(self): + os.remove(self.mybib) + os.remove(self.otherbib) + + def isduplicate(self, a, b): + """test Biblio's eq method in 'add' mode + """ + open(self.mybib, 'w').write(a) + open(self.otherbib, 'w').write(b) + res = paperscmd(f'add {self.otherbib} --bibtex {self.mybib} --update-key --mode r --debug', sp_cmd="call") + return res != 0 + + @unittest.skip("skip cause does not make sense with add") + def test_exactsame(self): + pass + + @unittest.skip("skip cause does not make sense with add") + def test_anotherkey(self): + pass + + + +class TestAddResolveDuplicate(BibTest): + + original = """@article{Perrette_2011, + doi = {10.5194/bg-8-515-2011}, + journal = {Biogeosciences}, + year = {RareYear} +}""" + + + conflict = """@article{AnotherKey, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {ConflictJournal} +}""" + + + def setUp(self): + self.mybib = tempfile.mktemp(prefix='papers.bib') + self.otherbib = tempfile.mktemp(prefix='papers.otherbib') + open(self.mybib, 'w').write(self.original) + + def tearDown(self): + os.remove(self.mybib) + os.remove(self.otherbib) + + def command(self, mode): + return f'echo {mode} | {PAPERSCMD} add {self.otherbib} --bibtex {self.mybib} --debug' + + def test_overwrite(self): + + expected = self.conflict + + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('o'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_skip(self): + + expected = self.original + + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('s'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + def test_append(self): + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('a'), shell=True) + # paperscmd(f'add {} --bibtex {} --debug'.format(self.otherbib, self.mybib)) + expected = self.conflict + '\n\n' + self.original + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_raises(self): + # update key to new entry, but does not merge... + open(self.otherbib, 'w').write(self.conflict) + func = lambda: sp.check_call(self.command('r'), shell=True) + self.assertRaises(Exception, func) + + + def test_original_updated_from_conflict(self): + + expected = """@article{Perrette_2011, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {Biogeosciences}, + year = {RareYear} +}""" + + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('u'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_conflict_updated_from_original(self): + + expected = """@article{AnotherKey, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {ConflictJournal}, + year = {RareYear} +}""" + + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('U'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_conflict_updated_from_original_but_originalkey(self): + + expected = """@article{Perrette_2011, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {ConflictJournal}, + year = {RareYear} +}""" + open(self.otherbib, 'w').write(self.conflict) + sp.check_call(self.command('U') + ' --update-key', shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + +class TestAddResolveDuplicateCommand(TestAddResolveDuplicate): + + def command(self, mode): + return f'{PAPERSCMD} add {self.otherbib} --bibtex {self.mybib} --mode {mode} --debug' + + + +class TestCheckResolveDuplicate(BibTest): + + original = """@article{Perrette_2011, + doi = {10.5194/bg-8-515-2011}, + journal = {Biogeosciences}, + year = {RareYear} +}""" + + + conflict = """@article{AnotherKey, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {ConflictJournal} +}""" + + + def setUp(self): + self.mybib = tempfile.mktemp(prefix='papers.bib') + open(self.mybib, 'w').write(self.original + '\n\n' + self.conflict) + + def tearDown(self): + os.remove(self.mybib) + + def command(self, mode): + return f'echo {mode} | {PAPERSCMD} check --duplicates --bibtex {self.mybib} --debug' + + def test_pick_conflict_1(self): + + expected = self.conflict + + sp.check_call(self.command('1'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + def test_pick_reference_2(self): + + expected = self.original + + sp.check_call(self.command('2'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_skip_check(self): + + expected = self.conflict + '\n\n' + self.original + + sp.check_call(self.command('s'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_not_a_duplicate(self): + + expected = self.conflict + '\n\n' + self.original + + sp.check_call(self.command('n'), shell=True) + self.assertMultiLineEqual(open(self.mybib).read().strip(), expected) # entries did not change + + + def test_raises(self): + # update key to new entry, but does not merge... + func = lambda: sp.check_call(self.command('r'), shell=True) + self.assertRaises(Exception, func) + + + def test_merge(self): + # update key to new entry, but does not merge... + expected = """@article{AnotherKey, + author = {New Author Field}, + doi = {10.5194/bg-8-515-2011}, + journal = {ConflictJournal}, + year = {RareYear} + }""" + func = lambda: sp.check_call(self.command('m\n3'), shell=True) + self.assertRaises(Exception, func) + diff --git a/tests/test_encoding.py b/tests/test_encoding.py new file mode 100644 index 0000000..2d93df3 --- /dev/null +++ b/tests/test_encoding.py @@ -0,0 +1,41 @@ +import unittest +from papers.bib import parse_file, format_file +from tests.common import BibTest + + +class TestBibtexFileEntry(unittest.TestCase): + + def test_parse_file(self): + file = parse_file('file.pdf:/path/to/file.pdf:pdf') + self.assertEqual(file, ['/path/to/file.pdf']) + file = parse_file(':/path/to/file.pdf:pdf') + self.assertEqual(file, ['/path/to/file.pdf']) + file = parse_file('/path/to/file.pdf:pdf') + self.assertEqual(file, ['/path/to/file.pdf']) + file = parse_file('/path/to/file.pdf') + self.assertEqual(file, ['/path/to/file.pdf']) + file = parse_file(':/path/to/file.pdf:') + self.assertEqual(file, ['/path/to/file.pdf']) + + + def test_parse_files(self): + files = parse_file(':/path/to/file1.pdf:pdf;:/path/to/file2.pdf:pdf') + self.assertEqual(files, ['/path/to/file1.pdf','/path/to/file2.pdf']) + + + def test_format_file(self): + field = format_file(['/path/to/file.pdf']) + self.assertEqual(field, ':/path/to/file.pdf:pdf') + + + def test_format_files(self): + field = format_file(['/path/to/file1.pdf','/path/to/file2.pdf']) + self.assertEqual(field, ':/path/to/file1.pdf:pdf;:/path/to/file2.pdf:pdf') + + +class TestUnicode(BibTest): + pass + + +class TestUnicodeVsLatexEncoding(BibTest): + pass \ No newline at end of file diff --git a/tests/test_extract.py b/tests/test_extract.py new file mode 100644 index 0000000..7f6ce14 --- /dev/null +++ b/tests/test_extract.py @@ -0,0 +1,25 @@ +import unittest +import os + +from papers.extract import extract_pdf_metadata +from papers.bib import bibtexparser +from tests.common import paperscmd, prepare_paper + + +class TestSimple(unittest.TestCase): + + def setUp(self): + self.pdf, self.doi, self.key, self.newkey, self.year, self.bibtex, self.file_rename = prepare_paper() + self.assertTrue(os.path.exists(self.pdf)) + + def test_doi(self): + self.assertEqual(paperscmd(f'doi {self.pdf}').strip(), self.doi) + + def test_fetch(self): + bibtexs = paperscmd(f'fetch {self.doi}').strip() + db1 = bibtexparser.loads(bibtexs) + db2 = bibtexparser.loads(self.bibtex) + self.assertEqual(db1.entries, db2.entries) + + def test_fetch_scholar(self): + extract_pdf_metadata(self.pdf, scholar=True) \ No newline at end of file diff --git a/tests/test_install.py b/tests/test_install.py new file mode 100644 index 0000000..add4028 --- /dev/null +++ b/tests/test_install.py @@ -0,0 +1,26 @@ +import os +import shutil +import tempfile +import unittest +# from pathlib import Path + +from tests.common import paperscmd + +class TestInstall(unittest.TestCase): + + def setUp(self): + self.mybib = tempfile.mktemp(prefix='papers.bib') + self.filesdir = tempfile.mktemp(prefix='papers.files') + + def test_local_install(self): + paperscmd(f'install --local --bibtex {self.mybib} --files {self.filesdir}') + self.assertTrue(os.path.exists(self.mybib)) + self.assertTrue(os.path.exists(self.filesdir)) + + def tearDown(self): + if os.path.exists(self.filesdir): + shutil.rmtree(self.filesdir) + if os.path.exists(self.mybib): + os.remove(self.mybib) + if os.path.exists('.papersconfig.json'): + os.remove('.papersconfig.json') \ No newline at end of file