11import os
22
3- from urllib .request import Request , urlopen
3+ import requests
4+ try :
5+ # import cloudscraper
6+ cloudscraper = None
7+ except ImportError :
8+ cloudscraper = None
49from urllib .parse import urlparse , urlunparse
510from warnings import warn
611
@@ -21,21 +26,47 @@ def resolve_redirects(u):
2126 # If removed, it'd make sense to canonicalize in simplify_url instead to
2227 # prevent spurious test failures
2328 u = urlunparse (urlparse (u )._replace (scheme = 'https' ))
24- req = Request (u , headers = {'User-Agent' : 'Mozilla/5.0' })
25- with urlopen (req ) as r :
26- return simplify_url (r .url )
2729
30+ if cloudscraper :
31+ scraper = cloudscraper .create_scraper ()
32+ return simplify_url (scraper .get (u ).url )
2833
29- def normalize_eq (u , v ):
34+ # Try emulating a browser to not get blocked
35+ h = {'User-Agent' : 'Mozilla/5.0' }
36+ resp = requests .get (u , headers = h )
37+ return simplify_url (resp .url )
38+
39+
40+ def normalize_eq (u , v , expect_diff = False ):
3041 if u == v :
3142 return True
32- warn (f"{ u } textually differs from { v } , please update the relevant case.\n "
33- "Attempting to recover by resolving redirects" )
43+ if not expect_diff :
44+ warn (f"{ u } textually differs from { v } , please update the relevant case.\n "
45+ "Attempting to recover by resolving redirects" )
3446 return (simplify_url (u ) == simplify_url (v )
3547 or resolve_redirects (u ) == resolve_redirects (v )
3648 )
3749
3850
51+ @pytest .mark .net
52+ @pytest .mark .parametrize (
53+ "needs_cloudscraper, urls" ,
54+ [
55+ (True ,
56+ ["http://pubs.aip.org/aip/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled" , # noqa: E501
57+ "http://pubs.aip.org/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled" , # noqa: E501
58+ "http://aip.scitation.org/doi/10.1063/1.5081715"
59+ ]),
60+ ]
61+ )
62+ def test_redirect (needs_cloudscraper , urls ) -> None :
63+ base = urls [0 ]
64+ if needs_cloudscraper and cloudscraper is None :
65+ pytest .skipif (f"cloudscraper needed to solve CloudFlare challenge on { base } " )
66+ for other in urls [1 :]:
67+ assert normalize_eq (base , other , expect_diff = True )
68+
69+
3970@pytest .mark .net
4071def test_validate_doi () -> None :
4172 data = [
0 commit comments