Skip to content

Commit

Permalink
Fix search for authors with name suffixes like III, IV, and SJ.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan Young authored and Jonathan Young committed Jul 9, 2023
1 parent ac6e259 commit 9fad91e
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
5 changes: 3 additions & 2 deletions browse/services/search/search_authors.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,9 @@ def _link_for_name_or_collab(item: str) -> AuthorList:
if len(name_bits) == 0:
query_str = item
else:
# Do not include Jr, Sr, III, etc. in search
if re.search(r'Jr\b|Sr\b|[IV]{2, }]', name_bits[-1]):
# Do not include SJ, Jr, Sr, III, IV, etc. in search
if re.match(r'SJ|Jr|Sr|[IV]{2,}$', name_bits[-1]) \
and len(name_bits) > 1:
name_bits.pop()

surname = ''
Expand Down
2 changes: 1 addition & 1 deletion tests/data/abs_files/ftp/arxiv/papers/2108/2108.10257.abs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ From: Jingyun Liang <[email protected]>
Date: Mon, 23 Aug 2021 15:55:32 GMT (11907kb,D)

Title: SwinIR: Image Restoration Using Swin Transformer
Authors: Jingyun Liang, Jiezhang Cao, Guolei Sun, Kai Zhang, Luc Van Gool, Radu
Authors: Jingyun Liang, Jr., Jiezhang Cao III, Guolei Sun SJ, Kai Zhang, Luc Van Gool, Radu
Timofte
Categories: eess.IV cs.CV
Comments: Sota results on classical/lightweight/real-world image SR, image
Expand Down
29 changes: 26 additions & 3 deletions tests/test_browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def test_long_author_colab(self):
a_tags = auths_elmt.find_all('a')
self.assertEqual(
len(a_tags), 2, 'Should be two <a> tags in authors div')

colab = a_tags[1]

self.assertIsNotNone(
Expand Down Expand Up @@ -417,10 +417,33 @@ def test_psi_in_abs(self):
'TeX psi in abstract should not get converted to UTF8')
self.assertNotIn('$j(φ,L)$', abs_elmt.text,
'TeX psi in abstract should not get converted to UTF8')
self.assertIn('The phase difference $\phi$, between the superconducting',
self.assertIn('The phase difference $\\phi$, between the superconducting',
abs_elmt.text,
"Expecting uncoverted $\phi$ in html abstract.")
"Expecting uncoverted $\\phi$ in html abstract.")

def test_authors_with_suffixes(self):
id = '2108.10257'
rv = self.app.get('/abs/' + id)
self.assertEqual(rv.status_code, 200)
html = BeautifulSoup(rv.data.decode('utf-8'), 'html.parser')

auths_elmt = html.find('div', 'authors')
self.assertTrue(auths_elmt, 'Should authors div element')

a_tags = auths_elmt.find_all('a')
self.assertEqual(
len(a_tags), 6, 'Should be 6 <a> tags in authors div')

j_liang = a_tags[0]
self.assertEqual(
j_liang['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Liang,+J')
j_cao = a_tags[1]
self.assertEqual(
j_cao['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Cao,+J')
g_sun = a_tags[2]
self.assertEqual(
g_sun['href'], 'https://arxiv.org/search/eess?searchtype=author&query=Sun,+G')

def test_year(self):
rv = self.app.get('/year/astro-ph/09')
self.assertEqual(rv.status_code, 200)
Expand Down

0 comments on commit 9fad91e

Please sign in to comment.