11import os
22
3- from urllib .request import Request , urlopen
3+ import requests
4+ try :
5+ import cloudscraper
6+ except ImportError :
7+ cloudscraper = None
48from urllib .parse import urlparse , urlunparse
59from warnings import warn
610
@@ -21,21 +25,47 @@ def resolve_redirects(u):
2125 # If removed, it'd make sense to canonicalize in simplify_url instead to
2226 # prevent spurious test failures
2327 u = urlunparse (urlparse (u )._replace (scheme = 'https' ))
24- req = Request (u , headers = {'User-Agent' : 'Mozilla/5.0' })
25- with urlopen (req ) as r :
26- return simplify_url (r .url )
2728
29+ if cloudscraper :
30+ scraper = cloudscraper .create_scraper ()
31+ return simplify_url (scraper .get (u ).url )
2832
29- def normalize_eq (u , v ):
33+ # Try emulating a browser to not get blocked
34+ h = {'User-Agent' : 'Mozilla/5.0' }
35+ resp = requests .get (u , headers = h )
36+ return simplify_url (resp .url )
37+
38+
39+ def normalize_eq (u , v , expect_diff = False ):
3040 if u == v :
3141 return True
32- warn (f"{ u } textually differs from { v } , please update the relevant case.\n "
33- "Attempting to recover by resolving redirects" )
42+ if not expect_diff :
43+ warn (f"{ u } textually differs from { v } , please update the relevant case.\n "
44+ "Attempting to recover by resolving redirects" )
3445 return (simplify_url (u ) == simplify_url (v )
3546 or resolve_redirects (u ) == resolve_redirects (v )
3647 )
3748
3849
50+ @pytest .mark .net
51+ @pytest .mark .parametrize (
52+ "needs_cloudscraper, urls" ,
53+ [
54+ (True ,
55+ ["http://pubs.aip.org/aip/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled" , # noqa: E501
56+ "http://pubs.aip.org/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled" , # noqa: E501
57+ "http://aip.scitation.org/doi/10.1063/1.5081715"
58+ ]),
59+ ]
60+ )
61+ def test_redirect (needs_cloudscraper , urls ) -> None :
62+ base = urls [0 ]
63+ if needs_cloudscraper and cloudscraper is None :
64+ pytest .skip (f"cloudscraper needed to solve CloudFlare challenge on { base } " )
65+ for other in urls [1 :]:
66+ assert normalize_eq (base , other , expect_diff = True )
67+
68+
3969@pytest .mark .net
4070def test_validate_doi () -> None :
4171 data = [
0 commit comments