-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathbuild.py
executable file
·156 lines (135 loc) · 5.31 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
# coding: utf-8
# pylint: disable=C0111
# pylint: disable=C0103
# pylint: disable=C0330
from __future__ import print_function
from datetime import datetime
import os
import sys
import traceback
import csv
import json
from operator import itemgetter
import requests
from bs4 import BeautifulSoup
import multiprocessing
from multiprocessing.pool import ThreadPool
LOCATION="us"
LANG="en"
SRC_CSV_FILE = "app-ids.csv"
SRC_MARKDOWN_FILE = "template.README.md"
SRC_APPS_PLACEHOLDER = '%%APPS%%'
SRC_APPSCOUNT_PLACEHOLDER = '%%APPS_COUNT%%'
SRC_TIMESTAMP_PLACEHOLDER = '%%BUILD_TIMESTAMP%%'
SRC_VERSION_PLACEHOLDER = '%%VERSION%%'
DIST_README = 'README.md'
DIST_JSON = 'google-app-ids.json'
DIST_CSV = 'google-app-ids.csv'
APP_LINK_PLACEHOLDER = "[{0}](https://play.google.com/store/apps/details?id={1}&hl={2}&gl={3})"
def csv_parse(csv_path):
print ('Parsing apps from CSV file...')
if not os.path.exists(csv_path):
raise Exception('{} source file could not be found!'.format(csv_path))
apps = []
with open(csv_path, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in reader:
apps.append([row[0]])
return apps[1:]
def apps_preprocess(apps):
apps_new = []
def app_download_details(app):
print ('|--Downloading ', app[0])
html_contents = requests.get(
'https://play.google.com/store/apps/details?id={0}&hl={1}&gl={2}'.format(app[0], LANG, LOCATION))
soup = BeautifulSoup(html_contents.text, 'html.parser')
logo_img = soup.find('img', attrs={'itemprop':'image',
'alt': 'Icon image'})
logo_src = logo_img['src'] if logo_img else ''
title = soup.find('span', attrs={'itemprop':'name'})
title_text = title.text if title else 'NOT FOUND'
cats = []
for ahref in soup.select('div[itemprop=genre] > a[aria-label]'):
cats.append(ahref['aria-label'])
return [app[0], title_text, logo_src, cats]
try:
cpus = max(min(multiprocessing.cpu_count(), 8), 2)
except NotImplementedError:
cpus = 2 # default
print ("| Downloading {0} app details using {1} parallel threads ...".format(
len(apps), cpus))
pool = ThreadPool(processes=cpus)
for app in apps:
pool.apply_async(app_download_details, args=(app,),
callback=lambda x : apps_new.append(x) if x[2] != 'NOT FOUND' \
else print ("|----> NOT FOUND: {}".format(x[0])))
pool.close()
pool.join()
return sorted(apps_new, key=lambda x: x[1].lower())
def dist_json(apps, output_path):
print ('Saving json file...')
json_data = []
for app in apps:
obj = {
'img_src': app[2],
'name': app[1],
'package_name': app[0],
'genres': app[3]
}
json_data.append(obj)
with open(output_path, 'w') as outfile:
json.dump(json_data, outfile, indent=2, ensure_ascii=False)
def dist_csv(apps, output_path):
print ('Saving csv file...')
with open(output_path, 'w') as outfile:
outfile.write("Icon,Name,Package,Genre\n")
for app in apps:
outfile.write("{0},{1},\"{2}\",\"{3}\"\n".format(
app[2], # logo
app[1], # name
app[0], # package
','.join(app[3]) # categories
))
def dist_readme(apps, template_path, package_path, output_path):
print ('Saving Markdown file...')
with open(template_path, 'r') as template:
template_contents = template.read()
app_contents = ''
for app in apps:
logo_src = app[2].replace('=w240', '=w80') if len(app) > 3 else ''
line = '| ![App Logo]({0}) | {1} | {2} | {3}'.format(logo_src,
APP_LINK_PLACEHOLDER.format(app[1], app[0], LANG, LOCATION), app[0],
', '.join(app[3]))
line += "\n"
app_contents += line
with open(package_path) as json_file:
package = json.load(json_file)
with open(output_path, 'w') as output:
today = datetime.today()
template_contents = template_contents.replace(SRC_VERSION_PLACEHOLDER,
package['version'])
template_contents = template_contents.replace(SRC_TIMESTAMP_PLACEHOLDER,
today.strftime('%b %d, %Y at %H:%M'))
template_contents = template_contents.replace(SRC_APPS_PLACEHOLDER,
app_contents)
template_contents = template_contents.replace(SRC_APPSCOUNT_PLACEHOLDER,
str(len(apps)))
output.write(template_contents)
#############################################################################
# Main
if __name__ == "__main__":
try:
cur_path = os.path.dirname(os.path.realpath(__file__))
csv_path = os.path.join(cur_path, 'src', SRC_CSV_FILE)
apps = apps_preprocess(csv_parse(csv_path))
dist_readme(apps, os.path.join(cur_path, 'src', SRC_MARKDOWN_FILE),
os.path.join(cur_path, 'package.json'),
os.path.join(cur_path, DIST_README))
dist_json(apps, os.path.join(cur_path, 'dist', DIST_JSON))
dist_csv(apps, os.path.join(cur_path, 'dist', DIST_CSV))
print ('Done.')
except Exception as e:
traceback.print_exc(file=sys.stdout)
print ("[ERROR] {0}".format(e))
sys.exit(1)