-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtasks.py
47 lines (39 loc) · 1.63 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from .celery import app
import os
from subprocess import check_call, check_output
import time
import sqlite3
import shutil
import sys
DB_NAME = "crawl-data.sqlite"
COPY_FILES = [ DB_NAME, "openwpm.log" ]
COPY_DIRS = [ "javascript.ldb" ]
GIT_SSH_PREFIX = "GIT_SSH_COMMAND='ssh -i /efs/ssh/wpm_deploy_key' "
@app.task
def run_crawl(scan_name, start, end, commit=None):
try:
if commit:
check_call(GIT_SSH_PREFIX + "git fetch", shell=True)
check_call(["git", "checkout", commit])
scan_dir = "/tmp/scan" + str(time.time())
check_call(["python", "crawl_aws.py", scan_dir, str(start), str(end)])
with sqlite3.connect(scan_dir + "/" + DB_NAME) as conn:
cursor = conn.cursor()
cursor.execute('select count(*) from site_visits')
count = cursor.fetchone()[0]
if count != end-start + 1:
raise ValueError("Wrong number of site visits: {}".format(count))
with open(scan_dir + "/good", 'w'):
pass
ipv4 = check_output(['ec2metadata', '--public-ipv4']).strip()
new_dir = "/efs/crawls/{}/{}-{}/{}-{}".format(scan_name, start, end,
ipv4, time.time())
os.makedirs(new_dir)
for filename in COPY_FILES:
shutil.copy(scan_dir + "/" + filename, new_dir + "/" + filename)
for dirname in COPY_DIRS:
shutil.copytree(scan_dir + "/" + dirname, new_dir + "/" + dirname)
with open(new_dir + "/finished", 'w'):
pass
except Exception as e:
raise ValueError, "Runtime exception %s" % e, sys.exc_info()[2]