Skip to content

Commit

Permalink
Update script to support Python 3
Browse files Browse the repository at this point in the history
Apply the following changes to make the list creation script compatible
with Python 3. After these changes it will no longer support Python 2.

  * Import configparser instead of ConfigParser, as this module was
    renamed in Python 3
  * Import quote and unquote from urllib.parse and urlopen from
    urllib.request instead of importing urllib2
  * Remove mock from the dependencies as it is part of the standard
    library in Python 3
  * Pin dependencies to the latest version
  * Use %d instead of %s when formatting a binary chunk header to avoid
    getting a TypeError. Also, use %d instead of %u, which is deprecated
  * Set the "version" key equal to the empty string instead of None when
    reverting configuration to avoid getting a "TypeError: option values
    must be strings".
  * Update CircleCI configuration to use a docker image with Python 3.8
  * Update Python version requirement in README
  * Update README instructions to use virtualenv with Python 3

Closes #108.
  • Loading branch information
boolean5 committed Aug 13, 2020
1 parent ebe95aa commit 9d868ba
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: 2.1
jobs:
build:
docker:
- image: circleci/python:2.7
- image: circleci/python:3.8
steps:
- checkout
- restore_cache:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ generates safebrowsing-compatible digest list files to be served by

# Requirements

* python 2.x
* python ≥ 3.6
* (optional) virtualenv and/or virtualenvwrapper

# Run

1. (optional) Make a virtualenv for the project and activate it:

```
virtualenv shavar-list-creation
virtualenv -p python3.8 shavar-list-creation
source shavar-list-creation/bin/activate
```
Expand Down
27 changes: 14 additions & 13 deletions lists2safebrowsing.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#!/usr/bin/env python

import ConfigParser
import configparser
import hashlib
import json
import os
import re
import requests
import sys
import time
import urllib2
from urllib.parse import quote, unquote
from urllib.request import urlopen

from packaging import version as p_version
from publicsuffixlist import PublicSuffixList
Expand Down Expand Up @@ -58,15 +59,15 @@ def get_list_url(config, section, key):
"""Return the requested list URL (or the default, if it isn't found)"""
try:
url = config.get(section, key)
except ConfigParser.NoOptionError:
except configparser.NoOptionError:
url = config.get("main", "default_disconnect_url")
return url


def load_json_from_url(config, section, key):
url = get_list_url(config, section, key)
try:
loaded_json = json.loads(urllib2.urlopen(url).read())
loaded_json = json.loads(urlopen(url).read())
except Exception:
sys.stderr.write("Error loading %s\n" % url)
sys.exit(-1)
Expand All @@ -91,7 +92,7 @@ def canonicalize(d):
# repeatedly unescape until no more hex encodings
while (1):
_d = d
d = urllib2.unquote(_d)
d = unquote(_d)
# if decoding had no effect, stop
if (d == _d):
break
Expand Down Expand Up @@ -142,7 +143,7 @@ def canonicalize(d):
_url = ""
for i in url:
if (ord(i) <= 32 or ord(i) >= 127 or i == '#' or i == '%'):
_url += urllib2.quote(i)
_url += quote(i)
else:
_url += i

Expand Down Expand Up @@ -319,7 +320,7 @@ def write_safebrowsing_blocklist(domains, output_name, log_file, chunk,
publishing += 1

# Write safebrowsing-list format header
output_bytes = b"a:%u:32:%s\n" % (chunk, hashdata_bytes)
output_bytes = b"a:%d:32:%d\n" % (chunk, hashdata_bytes)
output_bytes += b''.join(output)
# When testing on shavar-prod-lists no output file is provided
if output_file:
Expand Down Expand Up @@ -356,7 +357,7 @@ def process_entitylist(incoming, chunk, output_file, log_file, list_variant):
output.append(h.digest())

# Write the data file
output_file.write(b"a:%u:32:%s\n" % (chunk, hashdata_bytes))
output_file.write(b"a:%d:32:%d\n" % (chunk, hashdata_bytes))
# FIXME: we should really sort the output
for o in output:
output_file.write(o)
Expand Down Expand Up @@ -387,7 +388,7 @@ def process_plugin_blocklist(incoming, chunk, output_file, log_file,
hashdata_bytes += 32
output.append(h.digest())
# Write the data file
output_file.write(b"a:%u:32:%s\n" % (chunk, hashdata_bytes))
output_file.write(b"a:%d:32:%d\n" % (chunk, hashdata_bytes))
# FIXME: we should really sort the output
for o in output:
output_file.write(o)
Expand Down Expand Up @@ -436,7 +437,7 @@ def get_tracker_lists(config, section, chunknum):
"Supported tags: %s\nConfig file tags: %s" %
(ALL_TAGS, desired_tags)
)
except ConfigParser.NoOptionError:
except configparser.NoOptionError:
desired_tags = DEFAULT_DISCONNECT_LIST_TAGS

# Retrieve domains that match filters
Expand Down Expand Up @@ -500,7 +501,7 @@ def get_plugin_lists(config, section, chunknum):
"configuration file is empty. A plugin "
"blocklist URL must be specified." % section)

for line in urllib2.urlopen(blocklist_url).readlines():
for line in urlopen(blocklist_url).readlines():
line = line.decode().strip()
# don't add blank lines or comments
if not line or line.startswith('#'):
Expand Down Expand Up @@ -550,7 +551,7 @@ def version_configurations(config, section, version, revert=False):
new_source_url = initial_source_url_value
old_s3_key = versioned_key
new_s3_key = initial_s3_key_value
ver_val = None
ver_val = ""

# change the config
if config.has_option(section, source_url):
Expand Down Expand Up @@ -641,7 +642,7 @@ def start_versioning(config, chunknum, shavar_prod_lists_branches):


def main():
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
filename = config.read(["shavar_list_creation.ini"])
if not filename:
sys.stderr.write("Error loading shavar_list_creation.ini\n")
Expand Down
6 changes: 3 additions & 3 deletions publish2cloud.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import ConfigParser
import configparser
import hashlib
import os
import requests
Expand All @@ -22,7 +22,7 @@
)
from packaging import version as p_version

CONFIG = ConfigParser.SafeConfigParser(os.environ)
CONFIG = configparser.SafeConfigParser(os.environ)
CONFIG.read(['shavar_list_creation.ini'])
try:
REMOTE_SETTINGS_URL = ''
Expand All @@ -46,7 +46,7 @@
)
CLOUDFRONT_USER_ID = os.environ.get('CLOUDFRONT_USER_ID', None)

except ConfigParser.NoOptionError as err:
except configparser.NoOptionError as err:
REMOTE_SETTINGS_URL = ''
REMOTE_SETTINGS_AUTH = None
REMOTE_SETTINGS_BUCKET = ''
Expand Down
13 changes: 6 additions & 7 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
boto==2.40.0
publicsuffixlist==0.7.3
requests==2.20.0
trackingprotection_tools==0.4.6
packaging==19.2
boto==2.49.0
publicsuffixlist==0.7.4
requests==2.24.0
trackingprotection-tools==0.5.0
packaging==20.4

# test requirements
pytest==4.6.9
pytest==6.0.1
pytest-cov==2.10.0
mock==3.0.5
14 changes: 7 additions & 7 deletions tests/test_lists2safebrowsing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import ConfigParser
import configparser
import hashlib
import json
import time
from unittest.mock import call, patch, mock_open

import pytest
from mock import call, patch, mock_open
from trackingprotection_tools import DisconnectParser

import lists2safebrowsing as l2s
Expand Down Expand Up @@ -532,7 +532,7 @@ def test_process_list(capsys, chunknum, log, list_type):

def _get_entity_or_plugin_lists(chunknum, config, function, section, data):
"""Auxiliary function for get_entity_lists/get_plugin_lists tests."""
with patch("lists2safebrowsing.urllib2.urlopen",
with patch("lists2safebrowsing.urlopen",
mock_open(read_data=data.encode())) as mocked_urlopen, \
patch("lists2safebrowsing.open", mock_open()) as mocked_open:
output_file, _ = function(config, section, chunknum)
Expand All @@ -553,7 +553,7 @@ def _get_entity_or_plugin_lists(chunknum, config, function, section, data):
)
def test_get_entity_lists(chunknum, section, version, testcase):
"""Test creating an entity list from a configuration section."""
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.readfp(open("sample_shavar_list_creation.ini"))

if version:
Expand Down Expand Up @@ -586,7 +586,7 @@ def test_get_entity_lists(chunknum, section, version, testcase):

def test_get_plugin_lists(chunknum):
"""Test creating a plugin blocklist from a configuration section."""
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.readfp(open("sample_shavar_list_creation.ini"))
section = "plugin-blocklist"

Expand Down Expand Up @@ -617,7 +617,7 @@ def test_get_plugin_lists(chunknum):

def test_get_plugin_lists_empty_url(chunknum):
"""Test empty blocklist URL handling in get_plugin_lists."""
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.readfp(open("sample_shavar_list_creation.ini"))
section = "plugin-blocklist"

Expand All @@ -634,7 +634,7 @@ def test_get_plugin_lists_empty_url(chunknum):
)
def test_get_tracker_lists(parser, chunknum, section, domains, testcase):
"""Test creating a tracker blocklist from a configuration section."""
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.readfp(open("sample_shavar_list_creation.ini"))
version = None

Expand Down

0 comments on commit 9d868ba

Please sign in to comment.