From 7e813376862efdc615eb1a77fbba8ac8dbf85312 Mon Sep 17 00:00:00 2001 From: Brian Wilson Date: Tue, 27 Jul 2021 14:15:15 -0400 Subject: [PATCH] Initial pass at moving to geoip2. --- .../tasks/export/events_obfuscation.py | 6 +++-- edx/analytics/tasks/util/geolocation.py | 25 +++++++++++++------ requirements/default.txt | 5 ++-- requirements/docs.txt | 5 ++-- requirements/test.txt | 5 ++-- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/edx/analytics/tasks/export/events_obfuscation.py b/edx/analytics/tasks/export/events_obfuscation.py index 1eb1000ff4..e4b224b142 100644 --- a/edx/analytics/tasks/export/events_obfuscation.py +++ b/edx/analytics/tasks/export/events_obfuscation.py @@ -346,8 +346,10 @@ def _obfuscate_event(self, event): def extra_modules(self): import numpy - import pygeoip - return [numpy, pygeoip] + # import pygeoip + # return [numpy, pygeoip] + import geoip2 + return [numpy, geoip2] class EventObfuscationTask(ObfuscatorDownstreamMixin, MapReduceJobTaskMixin, luigi.WrapperTask): diff --git a/edx/analytics/tasks/util/geolocation.py b/edx/analytics/tasks/util/geolocation.py index dae033084b..5877afe939 100644 --- a/edx/analytics/tasks/util/geolocation.py +++ b/edx/analytics/tasks/util/geolocation.py @@ -8,11 +8,13 @@ from edx.analytics.tasks.util.url import ExternalURL try: - import pygeoip + # import pygeoip + import geoip2.database except ImportError: # The module will be imported on slave nodes even though they don't actually have the package installed. # The module is hopefully exported for tasks that actually use the module. - pygeoip = NotImplemented + # pygeoip = NotImplemented + geoip2 = NotImplemented UNKNOWN_COUNTRY = "UNKNOWN" @@ -36,6 +38,7 @@ class GeolocationMixin(GeolocationDownstreamMixin): """Provides support for initializing a geolocation object.""" geoip = None + # Why is this set to none? geoip2 = None def requires_local(self): """Adds geolocation_data as a local requirement.""" @@ -67,7 +70,8 @@ def init_reducer(self): break self.temporary_data_file.seek(0) - self.geoip = pygeoip.GeoIP(self.temporary_data_file.name, pygeoip.STANDARD) + # self.geoip = pygeoip.GeoIP(self.temporary_data_file.name, pygeoip.STANDARD) + self.geoip = geoip2.database.Reader(self.temporary_data_file.name) def final_reducer(self): """Clean up after the reducer is done.""" @@ -77,12 +81,15 @@ def final_reducer(self): return tuple() def extra_modules(self): - """Pygeoip is required by all tasks that perform geolocation.""" + # """Pygeoip is required by all tasks that perform geolocation.""" + """geoip2 is required by all tasks that perform geolocation.""" modules = super(GeolocationMixin, self).extra_modules() if not modules: - return [pygeoip] + # return [pygeoip] + return [geoip2] else: - return modules.append(pygeoip) + # return modules.append(pygeoip) + return modules.append(geoip2) def get_country_name(self, ip_address, debug_message=None): """ @@ -93,7 +100,8 @@ def get_country_name(self, ip_address, debug_message=None): """ try: - name = self.geoip.country_name_by_addr(ip_address) + # name = self.geoip.country_name_by_addr(ip_address) + name = (self.geoip.city(ip_address)).country.name except Exception: # pylint: disable=broad-except if debug_message: log.exception("Encountered exception getting country name for ip_address '%s': %s.", @@ -116,7 +124,8 @@ def get_country_code(self, ip_address, debug_message=None): """ try: - code = self.geoip.country_code_by_addr(ip_address) + # code = self.geoip.country_code_by_addr(ip_address) + code = (self.geoip.city(ip_address)).country.iso_code except Exception: # pylint: disable=broad-except if debug_message: log.exception("Encountered exception getting country code for ip_address '%s': %s.", diff --git a/requirements/default.txt b/requirements/default.txt index 1ac4f50af4..ce61a5f34c 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -61,13 +61,14 @@ paramiko==2.6.0 paypalrestsdk==1.9.0 pbr==5.4.3 # via stevedore protobuf==3.10.0 # via google-cloud-core, googleapis-common-protos -psycopg2==2.6.2 +# psycopg2==2.6.2 pyasn1-modules==0.2.7 # via google-auth, snowflake-connector-python pyasn1==0.4.7 # via pyasn1-modules, rsa, snowflake-connector-python pycparser==2.19 pycrypto==2.6.1 pycryptodomex==3.9.0 # via snowflake-connector-python -pygeoip==0.3.2 +# pygeoip==0.3.2 +geoip2==2.9.0 pyjwt==1.7.1 # via snowflake-connector-python pymongo==3.9.0 # via edx-opaque-keys pynacl==1.3.0 diff --git a/requirements/docs.txt b/requirements/docs.txt index f0a944eb61..7124e69373 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -64,13 +64,14 @@ paramiko==2.6.0 paypalrestsdk==1.9.0 pbr==5.4.3 protobuf==3.10.0 -psycopg2==2.6.2 +# psycopg2==2.6.2 pyasn1-modules==0.2.7 pyasn1==0.4.7 pycparser==2.19 pycrypto==2.6.1 pycryptodomex==3.9.0 -pygeoip==0.3.2 +geoip2==2.9.0 +# pygeoip==0.3.2 pygments==2.4.2 # via sphinx pyjwt==1.7.1 pymongo==3.9.0 diff --git a/requirements/test.txt b/requirements/test.txt index c7eac4ae8c..2323706433 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -79,14 +79,15 @@ paramiko==2.6.0 paypalrestsdk==1.9.0 pbr==5.4.3 protobuf==3.10.0 -psycopg2==2.6.2 +# psycopg2==2.6.2 pyasn1-modules==0.2.7 pyasn1==0.4.7 pycodestyle==2.3.1 pycparser==2.19 pycrypto==2.6.1 pycryptodomex==3.9.0 -pygeoip==0.3.2 +geoip2==2.9.0 +# pygeoip==0.3.2 pygments==2.4.2 # via diff-cover pyjwt==1.7.1 pylint==1.6.4