Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:pierreaubert/spinorama into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
pierreaubert committed Mar 15, 2024
2 parents ec6745d + da21557 commit f47bb47
Show file tree
Hide file tree
Showing 19 changed files with 218 additions and 86 deletions.
10 changes: 9 additions & 1 deletion debug_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,18 @@
class CORSRequestHandler(SimpleHTTPRequestHandler):
"""Generate CORS headers"""

def do_GET(self):
f = self.send_head()
if f:
try:
self.copyfile(f, self.wfile)
finally:
f.close()

def end_headers(self):
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET")
self.send_header("Cache-Control", "no-store, no-cache, must-revalidate")
# self.send_header("Cache-Control", "no-store, no-cache, must-revalidate")
return super(CORSRequestHandler, self).end_headers()


Expand Down
33 changes: 20 additions & 13 deletions generate_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,19 +361,26 @@ def sort_meta_score(s):


def find_metadata_file():
pattern = "{}-[0-9a-f]*.json".format(cpaths.CPATH_METADATA_JSON[:-5])
json_filenames = glob(pattern)
# print('DEBUG: {}'.format(json_filenames))
json_filename = None
for json_maybe in json_filenames:
check = re.match(".*/metadata[-][0-9a-f]{5}[.]json$", json_maybe)
if check is not None:
json_filename = json_maybe
break
if json_filename is not None and os.path.exists(json_filename):
return json_filename

return None
json_paths = []
for radical, json_path in (
("metadata", cpaths.CPATH_METADATA_JSON),
("eqdata", cpaths.CPATH_EQDATA_JSON),
):
pattern = "{}-[0-9a-f]*.json".format(json_path[:-5])
json_filenames = glob(pattern)
# print('DEBUG: {}'.format(json_filenames))
json_filename = None
for json_maybe in json_filenames:
regexp = ".*/{}[-][0-9a-f]{{5}}[.]json$".format(radical)
check = re.match(regexp, json_maybe)
if check is not None:
json_filename = json_maybe
break
if json_filename is not None and os.path.exists(json_filename):
json_paths.append(json_filename)
else:
json_paths.append(None)
return json_paths


def find_metadata_file_chunks():
Expand Down
27 changes: 19 additions & 8 deletions generate_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,14 @@ def generate_measurement(
site=site,
use_search=use_search,
)
meta_file, eq_file = find_metadata_file()
index_deps = [
"./src/website/speaker.html",
"./src/website/speaker_desc.html",
"./src/website/utils.py",
"./datas/metadata.py",
find_metadata_file(),
meta_file,
eq_file,
# *find_metadata_file_chunks(),
*glob("./src/website/assets/*.js"),
]
Expand Down Expand Up @@ -222,13 +224,17 @@ def generate_speakers(mako, dataframe, meta, site, use_search):

def main():
# load all metadata from generated json file
json_filename = find_metadata_file()
if json_filename is None:
logger.error("Cannot find %s", json_filename)
sys.exit(1)
metadata_json_filename, eqdata_json_filename = find_metadata_file()
for radical, json_check in (
("metadata", metadata_json_filename),
("eqdata", eqdata_json_filename),
):
if json_check is None:
logger.error("Cannot find %s, you should run generate_meta.py again!", radical)
sys.exit(1)

meta = None
with open(json_filename, "r") as f:
with open(metadata_json_filename, "r") as f:
meta = json.load(f)

# only build a dictionnary will all graphs
Expand Down Expand Up @@ -369,9 +375,14 @@ def main():
logger.info("Write %s", item_name)
item_html = mako_templates.get_template(item_name)
# remove the ./docs/assets parts
metadata_filename = json_filename[13:]
metadata_filename = metadata_json_filename[13:]
eqdata_filename = eqdata_json_filename[13:]
item_content = item_html.render(
df=main_df, meta=meta_sorted_score, site=site, metadata_filename=metadata_filename
df=main_df,
meta=meta_sorted_score,
site=site,
metadata_filename=metadata_filename,
eqdata_filename=eqdata_filename,
)
item_filename = cpaths.CPATH_DOCS + "/" + item_name
write_if_different(item_content, item_filename, force=False)
Expand Down
88 changes: 64 additions & 24 deletions generate_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
from datas import metadata

ACTIVATE_TRACING: bool = False
METADATA_HEAD_SIZE = 20


def tracing(msg: str):
Expand Down Expand Up @@ -790,47 +791,84 @@ def lign(v):


def dump_metadata(meta):
# size of the md5 hash
KEY_LENGTH = 5
# size of years (2024 -> 4)
YEAR_LENGTH = 4

metadir = cpaths.CPATH_DOCS_ASSETS
metafile = cpaths.CPATH_METADATA_JSON
eqfile = cpaths.CPATH_EQDATA_JSON
if not os.path.isdir(metadir):
os.makedirs(metadir)

def check_link(hashed_filename):
# add a link to make it easier for other scripts to find the metadata
with contextlib.suppress(OSError):
os.symlink(Path(hashed_filename).name, cpaths.CPATH_METADATA_JSON)
if 'metadata' in hashed_filename:
os.symlink(Path(hashed_filename).name, cpaths.CPATH_METADATA_JSON)

def dict_to_json(filename, d):
js = json.dumps(d)
key = md5(js.encode("utf-8"), usedforsecurity=False).hexdigest()[0:5]
hashed_filename = "{}-{}.json".format(filename[:-5], key)
if os.path.exists(hashed_filename) and os.path.exists(hashed_filename + ".zip"):
key = md5(js.encode("utf-8"), usedforsecurity=False).hexdigest()[0:KEY_LENGTH]
hashed_filename = "{}-{}.json".format(filename[:-KEY_LENGTH], key)
if (
os.path.exists(hashed_filename)
and os.path.exists(hashed_filename + ".zip")
and os.path.exists(hashed_filename + ".bz2")
):
logger.debug("skipping %s", hashed_filename)
check_link(hashed_filename)
return

# hash changed, remove old files
old_hash_pattern = "{}-*.json".format(filename[:-5])
for old_filename in glob(old_hash_pattern):
logger.debug("remove old file %s", old_filename)
os.remove(old_filename)
old_hash_pattern = "{}-*.json".format(filename[:-KEY_LENGTH])
old_hash_pattern_zip = "{}.zip".format(old_hash_pattern)
old_hash_pattern_bz2 = "{}.bz2".format(old_hash_pattern)
for pattern in (old_hash_pattern, old_hash_pattern_zip, old_hash_pattern_bz2):
for old_filename in glob(pattern):
logger.debug("remove old file %s", old_filename)
print("remove old file {}".format(old_filename))
os.remove(old_filename)

# write the non zipped file
with open(hashed_filename, "w", encoding="utf-8") as f:
f.write(js)
f.close()

# write the zip file
with zipfile.ZipFile(
hashed_filename + ".zip",
"w",
compression=zipfile.ZIP_DEFLATED,
allowZip64=True,
) as current_zip:
current_zip.writestr(hashed_filename, js)
logger.debug("generated %s and zip version", hashed_filename)
# write the zip and bz2 files
for ext, method in (
("zip", zipfile.ZIP_DEFLATED),
("bz2", zipfile.ZIP_BZIP2),
):
with zipfile.ZipFile(
"{}.{}".format(hashed_filename, ext),
"w",
compression=method,
allowZip64=True,
) as current_compressed:
current_compressed.writestr(hashed_filename, js)
logger.debug("generated %s and %s version", hashed_filename, ext)

check_link(hashed_filename)

meta_full = {k: v for k, v in meta.items() if not v.get("skip", False)}
# split eq data v.s. others as they are not required on the front page
meta_full = {
k: {k2: v2 for k2, v2 in v.items() if k2 != "eqs"}
for k, v in meta.items()
if not v.get("skip", False)
}
eq_full = {
k: {k2: v2 for k2, v2 in v.items() if k2 in ("eqs", "brand", "model")}
for k, v in meta.items()
if not v.get("skip", False)
}

# first store a big file with all the data inside. It worked well up to 2023
# when it became too large even compressed and slowed down the web frontend
# too much
dict_to_json(metafile, meta_full)
dict_to_json(eqfile, eq_full)

# debugjs = find_metadata_file()
# debugmeta = None
Expand All @@ -840,31 +878,33 @@ def dict_to_json(filename, d):
# print('DEBUG: size of meta ==> {}'.format(len(meta_full.keys())))
# print('DEBUG: size of js ==> {}'.format(len(debugmeta.keys())))

# generate a short version for rapid home page charging
# generate a short head for rapid home page charging

# TODO(pierre)
# let's check if it is faster to load slices than the full file
# partitionning is per year, each file is hashed and the hash
# is stored in the name.

# Warning: when reading the chunks you need to read them from recent to old and discard he keys you a#lready have seen,
meta_sorted_date = list(sort_metadata_per_date(meta_full).items())
meta_sorted_date_head = dict(meta_sorted_date[0:10])
meta_sorted_date_tail = dict(meta_sorted_date[10:])
meta_sorted_date_head = dict(meta_sorted_date[0:METADATA_HEAD_SIZE])
meta_sorted_date_tail = dict(meta_sorted_date[METADATA_HEAD_SIZE:])

filename = metafile[:-5] + "-head.json"
filename = metafile[:-KEY_LENGTH] + "-head.json"
dict_to_json(filename, meta_sorted_date_head)

def by_year(key):
m = meta_sorted_date_tail[key]
def_m = m["default_measurement"]
year = int(m["measurements"][def_m].get("review_published", "1970")[0:4])
year = int(m["measurements"][def_m].get("review_published", "1970")[0:YEAR_LENGTH])
# group together years without too many reviews
if year > 1970 and year < 2020:
return 2019
return year

grouped_by_year = groupby(meta_sorted_date_tail, by_year)
for year, group in grouped_by_year:
filename = "{}-{:4d}.json".format(metafile[:-5], year)
filename = "{}-{:4d}.json".format(metafile[:-KEY_LENGTH], year)
dict_to_json(filename, {k: meta_sorted_date_tail[k] for k in list(group)})


Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions src/spinorama/constant_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
CPATH_DOCS_ASSETS_JS = "{}/{}".format(CPATH_DOCS_ASSETS, "")
CPATH_DOCS_ASSETS_CSS = "{}/{}".format(CPATH_DOCS_ASSETS, "")
CPATH_METADATA_JSON = "{}/{}".format(CPATH_DOCS_ASSETS, "metadata.json")
CPATH_EQDATA_JSON = "{}/{}".format(CPATH_DOCS_ASSETS, "eqdata.json")
CPATH_DOCS_SPEAKERS = "{}/{}".format(CPATH_DOCS, "speakers")
CPATH_DOCS_PICTURES = "{}/{}".format(CPATH_DOCS, "pictures")

Expand Down
66 changes: 47 additions & 19 deletions src/website/assets/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

import { urlSite, metadataFilename } from './meta.js';
import { urlSite, metadataFilename, eqdataFilename } from './meta.js';
import { getID } from './misc.js';

export const knownMeasurements = [
Expand Down Expand Up @@ -165,7 +165,7 @@ export function getSpeakerData(metaSpeakers, graph, speaker, origin, version) {

const url = getSpeakerUrl(metaSpeakers, graph, speaker, origin, version);
// console.log('fetching url=' + url)
const spec = fetch(url)
const spec = fetch(url, { headers: { 'Accept-Encoding': 'bz2, gzip, deflate', 'Content-Type': 'application/json' } })
.then((response) => response.json())
.catch((error) => {
console.log('ERROR getSpeaker failed for ' + url + 'with error: ' + error);
Expand All @@ -174,51 +174,79 @@ export function getSpeakerData(metaSpeakers, graph, speaker, origin, version) {
return spec;
}

export function getAllSpeakers(metadata) {
export function getAllSpeakers(table) {
const metaSpeakers = {};
const speakers = [];
metadata.forEach((value) => {
table.forEach((value) => {
const speaker = value.brand + ' ' + value.model;
speakers.push(speaker);
metaSpeakers[speaker] = value;
});
return [metaSpeakers, speakers.sort()];
}

export function getMetadata() {
const url = urlSite + 'assets/' + metadataFilename;
// console.log('fetching url=' + url)
const spec = fetch(url, {
headers: {
'Content-Encoding': 'gzip',
'Content-Type': 'application/json',
},
})
function fetchDataAndMap(url, encoding) {
// console.log('fetching url=' + url + ' encoding=' + encoding);
const spec = fetch(url, { headers: { 'Accept-Encoding': encoding, 'Content-Type': 'application/json' } })
.catch((error) => {
console.log('ERROR getMetadata for ' + url + ' yield a 404 with error: ' + error);
return null;
})
.then((response) => response.json())
.catch((error) => {
console.log('ERROR getMetadata for ' + url + ' yield a json error: ' + error);
return null;
})
.then((data) => {
// convert to object
const metadata = Object.values(data);
// console.log('metadata '+metadata.length)
const values = Object.values(data);
return new Map(
metadata.map((speaker) => {
values.map((speaker) => {
const key = getID(speaker.brand, speaker.model);
return [key, speaker];
})
);
})
.catch((error) => {
console.log('ERROR getMetadata for ' + url + 'yield a 404 with error: ' + error);
console.log('ERROR getMetadata for ' + url + ' failed: ' + error);
return null;
});
return spec;
}

export function getMetadata() {
const url = urlSite + 'assets' + metadataFilename;
return fetchDataAndMap(url, 'bz2, zip, deflate');
}

export function getEQdata(table) {
const url = urlSite + 'assets' + eqdataFilename;
const runit = fetchDataAndMap(url, 'bz2, gzip, zip, deflate')
.then((specs) => {
const data = new Map();
table.forEach((speaker, key) => {
if (specs.has(key)) {
const eqs = specs.get(key);
if (eqs.eqs) {
speaker['eqs'] = eqs.eqs;
}
}
data.set(key, speaker);
});
return data;
})
.catch((error) => {
console.log('ERROR getEQdata for ' + url + 'yield a 404 with error: ' + error);
return null;
});
return runit;
}

export function getMetadataChunked() {
const url = urlSite + 'assets/' + metadataFilename;
// console.log('fetching url=' + url)
const spec = fetch(url, {
headers: {
'Content-Encoding': 'gzip',
'Accept-Encoding': 'zip',
'Content-Type': 'application/json',
},
})
Expand Down
Loading

0 comments on commit f47bb47

Please sign in to comment.