From 9fad91e7b077f5c0156bf444eda14e05419bc67d Mon Sep 17 00:00:00 2001 From: Jonathan Young Date: Sat, 8 Jul 2023 20:18:01 -0400 Subject: [PATCH] Fix search for authors with name suffixes like III, IV, and SJ. --- browse/services/search/search_authors.py | 5 ++-- .../ftp/arxiv/papers/2108/2108.10257.abs | 2 +- tests/test_browse.py | 29 +++++++++++++++++-- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/browse/services/search/search_authors.py b/browse/services/search/search_authors.py index 9fb21e885..f2c8d656d 100644 --- a/browse/services/search/search_authors.py +++ b/browse/services/search/search_authors.py @@ -141,8 +141,9 @@ def _link_for_name_or_collab(item: str) -> AuthorList: if len(name_bits) == 0: query_str = item else: - # Do not include Jr, Sr, III, etc. in search - if re.search(r'Jr\b|Sr\b|[IV]{2, }]', name_bits[-1]): + # Do not include SJ, Jr, Sr, III, IV, etc. in search + if re.match(r'SJ|Jr|Sr|[IV]{2,}$', name_bits[-1]) \ + and len(name_bits) > 1: name_bits.pop() surname = '' diff --git a/tests/data/abs_files/ftp/arxiv/papers/2108/2108.10257.abs b/tests/data/abs_files/ftp/arxiv/papers/2108/2108.10257.abs index 3d4c399a1..983b840c6 100644 --- a/tests/data/abs_files/ftp/arxiv/papers/2108/2108.10257.abs +++ b/tests/data/abs_files/ftp/arxiv/papers/2108/2108.10257.abs @@ -5,7 +5,7 @@ From: Jingyun Liang Date: Mon, 23 Aug 2021 15:55:32 GMT (11907kb,D) Title: SwinIR: Image Restoration Using Swin Transformer -Authors: Jingyun Liang, Jiezhang Cao, Guolei Sun, Kai Zhang, Luc Van Gool, Radu +Authors: Jingyun Liang, Jr., Jiezhang Cao III, Guolei Sun SJ, Kai Zhang, Luc Van Gool, Radu Timofte Categories: eess.IV cs.CV Comments: Sota results on classical/lightweight/real-world image SR, image diff --git a/tests/test_browse.py b/tests/test_browse.py index 2d232401e..0b361efb3 100644 --- a/tests/test_browse.py +++ b/tests/test_browse.py @@ -371,7 +371,7 @@ def test_long_author_colab(self): a_tags = auths_elmt.find_all('a') self.assertEqual( len(a_tags), 2, 'Should be two tags in authors div') - + colab = a_tags[1] self.assertIsNotNone( @@ -417,10 +417,33 @@ def test_psi_in_abs(self): 'TeX psi in abstract should not get converted to UTF8') self.assertNotIn('$j(φ,L)$', abs_elmt.text, 'TeX psi in abstract should not get converted to UTF8') - self.assertIn('The phase difference $\phi$, between the superconducting', + self.assertIn('The phase difference $\\phi$, between the superconducting', abs_elmt.text, - "Expecting uncoverted $\phi$ in html abstract.") + "Expecting uncoverted $\\phi$ in html abstract.") + + def test_authors_with_suffixes(self): + id = '2108.10257' + rv = self.app.get('/abs/' + id) + self.assertEqual(rv.status_code, 200) + html = BeautifulSoup(rv.data.decode('utf-8'), 'html.parser') + + auths_elmt = html.find('div', 'authors') + self.assertTrue(auths_elmt, 'Should authors div element') + + a_tags = auths_elmt.find_all('a') + self.assertEqual( + len(a_tags), 6, 'Should be 6 tags in authors div') + j_liang = a_tags[0] + self.assertEqual( + j_liang['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Liang,+J') + j_cao = a_tags[1] + self.assertEqual( + j_cao['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Cao,+J') + g_sun = a_tags[2] + self.assertEqual( + g_sun['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Sun,+G') + def test_year(self): rv = self.app.get('/year/astro-ph/09') self.assertEqual(rv.status_code, 200)