diff --git a/elixir/data.py b/elixir/data.py index 4ec66e2d..00f99675 100755 --- a/elixir/data.py +++ b/elixir/data.py @@ -20,7 +20,7 @@ import berkeleydb import re -from . import lib +from .lib import autoBytes import os import os.path import errno @@ -72,6 +72,14 @@ def iter(self, dummy=False): if dummy: yield maxId, None, None, None + def exists(self, idx, line_num): + entries = deflist_regex.findall(self.data) + for id, _, line, _ in entries: + if id == idx and int(line) == line_num: + return True + + return False + def append(self, id, type, line, family): if type not in defTypeD: return @@ -159,26 +167,32 @@ def __init__(self, filename, readonly, contentType, shared=False): self.ctype = contentType def exists(self, key): - key = lib.autoBytes(key) + key = autoBytes(key) return self.db.exists(key) def get(self, key): - key = lib.autoBytes(key) + key = autoBytes(key) p = self.db.get(key) - return self.ctype(p) if p is not None else None + if p is None: + return None + p = self.ctype(p) + return p def get_keys(self): return self.db.keys() def put(self, key, val, sync=False): - key = lib.autoBytes(key) - val = lib.autoBytes(val) + key = autoBytes(key) + val = autoBytes(val) if type(val) is not bytes: val = val.pack() self.db.put(key, val) if sync: self.db.sync() + def sync(self): + self.db.sync() + def close(self): self.db.close() @@ -201,13 +215,6 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False): # Map serial number to filename self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared) self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared) - self.defs_cache = {} - NOOP = lambda x: x - self.defs_cache['C'] = BsdDB(dir + '/definitions-cache-C.db', ro, NOOP, shared=shared) - self.defs_cache['K'] = BsdDB(dir + '/definitions-cache-K.db', ro, NOOP, shared=shared) - self.defs_cache['D'] = BsdDB(dir + '/definitions-cache-D.db', ro, NOOP, shared=shared) - self.defs_cache['M'] = BsdDB(dir + '/definitions-cache-M.db', ro, NOOP, shared=shared) - assert sorted(self.defs_cache.keys()) == sorted(lib.CACHED_DEFINITIONS_FAMILIES) self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared) self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared) self.dtscomp = dtscomp @@ -223,10 +230,6 @@ def close(self): self.file.close() self.vers.close() self.defs.close() - self.defs_cache['C'].close() - self.defs_cache['K'].close() - self.defs_cache['D'].close() - self.defs_cache['M'].close() self.refs.close() self.docs.close() if self.dtscomp: diff --git a/elixir/update.py b/elixir/update.py new file mode 100644 index 00000000..7b94052d --- /dev/null +++ b/elixir/update.py @@ -0,0 +1,433 @@ +from concurrent.futures import ProcessPoolExecutor, wait +from multiprocessing import Manager, cpu_count +import logging +from threading import Lock + +from elixir.lib import script, scriptLines, getFileFamily, isIdent, getDataDir +from elixir.data import PathList, DefList, RefList, DB, BsdDB + +from find_compatible_dts import FindCompatibleDTS + +# Holds databases and update changes that are not commited yet +class UpdatePartialState: + def __init__(self, db, tag, idx_to_hash_and_filename, hash_to_idx): + self.db = db + self.tag = tag + self.idx_to_hash_and_filename = idx_to_hash_and_filename + self.hash_to_idx = hash_to_idx + + self.defs_lock = Lock() + self.refs_lock = Lock() + self.docs_lock = Lock() + self.comps_lock = Lock() + self.comps_docs_lock = Lock() + + def get_idx_from_hash(self, hash): + if hash in self.hash_to_idx: + return self.hash_to_idx[hash] + else: + return self.db.blob.get(hash) + + # Add definitions to database + def add_defs(self, defs): + with self.defs_lock: + for ident, occ_list in defs.items(): + if self.db.defs.exists(ident): + obj = self.db.defs.get(ident) + else: + obj = DefList() + + for (idx, type, line, family) in occ_list: + obj.append(idx, type, line, family) + + self.db.defs.put(ident, obj) + + # Add references to database + def add_refs(self, refs): + with self.refs_lock: + for ident, idx_to_lines in refs.items(): + obj = self.db.refs.get(ident) + if obj is None: + obj = RefList() + + for (idx, family), lines in idx_to_lines.items(): + lines_str = ','.join((str(n) for n in lines)) + obj.append(idx, lines_str, family) + + self.db.refs.put(ident, obj) + + # Add documentation references to database + def add_docs(self, idx, family, docs): + with self.docs_lock: + self.add_to_reflist(self.db.docs, idx, family, docs) + + # Add compatible references to database + def add_comps(self, idx, family, comps): + with self.comps_lock: + self.add_to_reflist(self.db.comps, idx, family, comps) + + # Add compatible docs to database + def add_comps_docs(self, idx, family, comps_docs): + with self.comps_docs_lock: + self.add_to_reflist(self.db.comps_docs, idx, family, comps_docs) + + # Add data to database file that uses reflist schema + def add_to_reflist(self, db_file, idx, family, to_add): + for ident, lines in to_add.items(): + if db_file.exists(ident): + obj = db_file.get(ident) + else: + obj = RefList() + + lines_str = ','.join((str(n) for n in lines)) + obj.append(idx, lines_str, family) + db_file.put(ident, obj) + + +# NOTE: not thread safe, has to be ran before the actual job is started +# Builds UpdatePartialState +def build_partial_state(db, tag): + if db.vars.exists('numBlobs'): + idx = db.vars.get('numBlobs') + else: + idx = 0 + + # Get blob hashes and associated file names (without path) + blobs = scriptLines('list-blobs', '-f', tag) + + idx_to_hash_and_filename = {} + hash_to_idx = {} + + # Collect new blobs, assign database ids to the blobs + for blob in blobs: + hash, filename = blob.split(b' ',maxsplit=1) + blob_exist = db.blob.exists(hash) + if not blob_exist: + hash_to_idx[hash] = idx + idx_to_hash_and_filename[idx] = (hash, filename.decode()) + idx += 1 + + # reserve ids in blob space. + # NOTE: this variable does not represent the actual number of blos in the database now, + # just the number of ids reserved for blobs. the space is not guaranteed to be continous + # if update job is interrupted or versions are scrubbed from the database. + db.vars.put('numBlobs', idx) + + return UpdatePartialState(db, tag, idx_to_hash_and_filename, hash_to_idx) + +# NOTE: not thread safe, has to be ran after job is finished +# Applies changes from partial update state - mainly to hash, file, blob and versions databases +# It is assumed that indexes not present in versions are ignored +def apply_partial_state(state: UpdatePartialState): + for idx, (hash, filename) in state.idx_to_hash_and_filename.items(): + state.db.hash.put(idx, hash) + state.db.file.put(idx, filename) + + for hash, idx in state.hash_to_idx.items(): + state.db.blob.put(hash, idx) + + # Update versions + blobs = scriptLines('list-blobs', '-p', state.tag) + buf = [] + + for blob in blobs: + hash, path = blob.split(b' ', maxsplit=1) + idx = state.get_idx_from_hash(hash) + buf.append((idx, path)) + + buf.sort() + obj = PathList() + for idx, path in buf: + obj.append(idx, path) + + state.db.vers.put(state.tag, obj, sync=True) + + +# Get definitions for a file +def get_defs(idx, hash, filename, defs): + family = getFileFamily(filename) + if family in [None, 'M']: + return + + lines = scriptLines('parse-defs', hash, filename, family) + + for l in lines: + ident, type, line = l.split(b' ') + type = type.decode() + line = int(line.decode()) + if isIdent(ident): + if ident not in defs: + defs[ident] = [] + defs[ident].append((idx, type, line, family)) + + return defs + + +# NOTE: it is assumed that update_refs and update_defs are not running +# concurrently. hence, defs are not locked +# defs database MUSNT be updated while get_refs is running +# Get references for a file +def get_refs(idx, hash, filename, defs, refs): + family = getFileFamily(filename) + if family is None: + return + + # Kconfig values are saved as CONFIG_ + prefix = b'' if family != 'K' else b'CONFIG_' + + tokens = scriptLines('tokenize-file', '-b', hash, family) + even = True + line_num = 1 + + for tok in tokens: + even = not even + if even: + tok = prefix + tok + deflist = defs.get(tok) + + if deflist and not deflist.exists(str(idx).encode(), line_num): + # We only index CONFIG_??? in makefiles + if (family != 'M' or tok.startswith(b'CONFIG_')): + if tok not in refs: + refs[tok] = {} + + if (idx, family) not in refs[tok]: + refs[tok][(idx, family)] = [] + + refs[tok][(idx, family)].append(line_num) + + else: + line_num += tok.count(b'\1') + + return refs + +# Collect compatible script output into reflist-schema compatible format +def collect_get_blob_output(lines): + results = {} + for l in lines: + ident, line = l.split(' ') + line = int(line) + + if ident not in results: + results[ident] = [] + results[ident].append(line) + + return results + +# Get docs for a single file +def get_docs(idx, hash, filename): + family = getFileFamily(filename) + if family in [None, 'M']: return + + lines = (line.decode() for line in scriptLines('parse-docs', hash, filename)) + docs = collect_get_blob_output(lines) + + return (idx, family, docs) + +# Get compatible references for a single file +def get_comps(idx, hash, filename): + family = getFileFamily(filename) + if family in [None, 'K', 'M']: return + + compatibles_parser = FindCompatibleDTS() + lines = compatibles_parser.run(scriptLines('get-blob', hash), family) + comps = collect_get_blob_output(lines) + + return (idx, family, comps) + +# Get compatible documentation references for a single file +# NOTE: assumes comps is not running concurrently +def get_comps_docs(idx, hash, _, comps): + family = 'B' + + compatibles_parser = FindCompatibleDTS() + lines = compatibles_parser.run(scriptLines('get-blob', hash), family) + comps_docs = {} + for l in lines: + ident, line = l.split(' ') + + if comps.exists(ident): + if ident not in comps_docs: + comps_docs[ident] = [] + comps_docs[ident].append(int(line)) + + return (idx, family, comps_docs) + +def batch(job): + def f(chunk, **kwargs): + return [job(*args, **kwargs) for args in chunk] + return f + +# NOTE: some of the following functions are kind of redundant, and could sometimes be +# higher-order functions, but that's not supported by multiprocessing + +def batch_defs(chunk): + defs = {} + for ch in chunk: + get_defs(*ch, defs=defs) + return defs + +# Handle defs task results +def handle_defs_results(state): + def f(future): + try: + result = future.result() + if result is not None: + state.add_defs(result) + except Exception: + logging.exception(f"handling future results for defs raised") + return f + +def batch_docs(*args, **kwargs): return batch(get_docs)(*args, **kwargs) +def batch_comps(*args, **kwargs): return batch(get_comps)(*args, **kwargs) + +# Run references tasks on a chunk +# NOTE: references can open definitions database in read-only mode, because +# definitions job was finished +def batch_refs(chunk, **kwargs): + defs = BsdDB(getDataDir() + '/definitions.db', True, DefList) + refs = {} + for args in chunk: + get_refs(*args, defs=defs, refs=refs, **kwargs) + defs.close() + return refs + +# Handle refs task results +def handle_refs_results(state): + def f(future): + try: + result = future.result() + if result is not None: + state.add_refs(result) + except Exception: + logging.exception(f"handling future results for refs raised") + return f + +# Run comps_docs tasks on a chunk +# NOTE: compatibledts database can be opened for the same reasons as in batch_refs +def batch_comps_docs(chunk, **kwargs): + comps = BsdDB(getDataDir() + '/compatibledts.db', True, DefList) + result = [get_comps_docs(*args, comps=comps, **kwargs) for args in chunk] + comps.close() + return result + +def handle_batch_results(callback): + def f(future): + try: + results = future.result() + for result in results: + if result is not None: + callback(*result) + except Exception: + logging.exception(f"handling future results for {callback.__name__} raised") + return f + +# Split list into sublist of chunk_size size +def split_into_chunks(list, chunk_size): + return [list[i:i+chunk_size] for i in range(0, len(list), chunk_size)] + +# Update a single version +def update_version(db, tag, pool, manager, dts_comp_support): + state = build_partial_state(db, tag) + + # Collect blobs to process and split list of blobs into chunks + idxes = [(idx, hash, filename) for (idx, (hash, filename)) in state.idx_to_hash_and_filename.items()] + chunksize = int(len(idxes) / cpu_count()) + chunksize = min(max(1, chunksize), 400) + chunks = split_into_chunks(idxes, chunksize) + + def after_all_defs_done(): + # NOTE: defs database cannot be written to from now on. This is very important - process pool is used, + # and bsddb cannot be shared between processes until/unless bsddb concurrent data store is implemented. + # Operations on closed databases raise exceptions that would in this case be indicative of a bug. + state.db.defs.sync() + state.db.defs.close() + print("defs db closed") + + # Start refs job + futures = [pool.submit(batch_refs, ch) for ch in chunks] + return ("refs", (futures, handle_refs_results(state), None)) + + def after_all_comps_done(): + state.db.comps.sync() + state.db.comps.close() + print("comps db closed") + + # Start comps_docs job + futures = [pool.submit(batch_comps_docs, ch) for ch in chunks] + return ("comps_docs", (futures, handle_batch_results(state.add_comps_docs), None)) + + # Used to track futures for jobs, what to do after a single future finishes, + # and after the whole job finishes + to_track = { + "defs": ([], handle_defs_results(state), after_all_defs_done), + "docs": ([], handle_batch_results(state.add_docs), None), + } + + if dts_comp_support: + to_track["comps"] = ([], handle_batch_results(state.add_comps), after_all_comps_done) + + # Start initial jobs for all chunks + for ch in chunks: + to_track["defs"][0].append(pool.submit(batch_defs, ch)) + to_track["docs"][0].append(pool.submit(batch_docs, ch)) + + if dts_comp_support: + to_track["comps"][0].append(pool.submit(batch_comps, ch)) + + + # Used to track progress of jobs + total_lengths = { + k: (0, len(v[0])) for k, v in to_track.items() + } + + # track job progress + while len(to_track) != 0: + new_to_track = {} + + for name, (futures, after_single_done, after_all_done) in to_track.items(): + new_futures = futures + + if len(futures) != 0: + result = wait(futures, timeout=1) + + if len(result.done) != 0: + total_lengths[name] = (total_lengths[name][0] + len(result.done), total_lengths[name][1]) + print(name, f"progress: {int((total_lengths[name][0]/total_lengths[name][1])*100)}%") + new_futures = [f for f in futures if f not in result.done] + + for f in result.done: + if after_single_done is not None: + after_single_done(f) + + if len(new_futures) == 0: + if after_all_done is not None: + k, v = after_all_done() + new_to_track[k] = v + total_lengths[k] = (0, len(v[0])) + else: + new_to_track[name] = (new_futures, after_single_done, after_all_done) + else: + new_to_track[name] = (new_futures, after_single_done, after_all_done) + + to_track = new_to_track + + print("update done, applying partial state") + apply_partial_state(state) + +if __name__ == "__main__": + dts_comp_support = int(script('dts-comp')) + db = None + + manager = Manager() + with ProcessPoolExecutor() as pool: + for tag in scriptLines('list-tags'): + if db is None: + db = DB(getDataDir(), readonly=False, dtscomp=dts_comp_support, shared=True) + + if not db.vers.exists(tag): + print("updating tag", tag) + update_version(db, tag, pool, manager, dts_comp_support) + db.close() + db = None + diff --git a/script.sh b/script.sh index 1446777e..d67e5594 100755 --- a/script.sh +++ b/script.sh @@ -165,7 +165,7 @@ parse_defs_C() git cat-file blob "$opt1" > "$full_path" # Use ctags to parse most of the defs - ctags -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" | + ctags -u -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" | grep -avE "^operator |CONFIG_" | awk '{print $1" "$2" "$3}' @@ -182,7 +182,7 @@ parse_defs_K() tmp=`mktemp -d` full_path=$tmp/$opt2 git cat-file blob "$opt1" > "$full_path" - ctags -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" | + ctags -u -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" | awk '{print "CONFIG_"$1" "$2" "$3}' rm "$full_path" rmdir $tmp @@ -193,7 +193,7 @@ parse_defs_D() tmp=`mktemp -d` full_path=$tmp/$opt2 git cat-file blob "$opt1" > "$full_path" - ctags -x --language-force=dts "$full_path" | + ctags -u -x --language-force=dts "$full_path" | awk '{print $1" "$2" "$3}' rm "$full_path" rmdir $tmp @@ -204,7 +204,13 @@ parse_docs() tmpfile=`mktemp` git cat-file blob "$opt1" > "$tmpfile" - "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?" + + # Shortcut: if '/**' isn't present in the file, it cannot contain a doc. + # This avoids calling find-file-doc-comments.pl on most files, which is an + # expensive operation. + if grep -qF '/**' "$tmpfile"; then + "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?" + fi rm -rf "$tmpfile" } diff --git a/update.py b/update.py deleted file mode 100755 index 9d84ff31..00000000 --- a/update.py +++ /dev/null @@ -1,638 +0,0 @@ -#!/usr/bin/env python3 - -# This file is part of Elixir, a source code cross-referencer. -# -# Copyright (C) 2017--2020 Mikaƫl Bouillot -# Maxime Chretien -# and contributors -# -# Elixir is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elixir is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with Elixir. If not, see . - -# Throughout, an "idx" is the sequential number associated with a blob. -# This is different from that blob's Git hash. - -from sys import argv -from threading import Thread, Lock, Event, Condition - -import elixir.lib as lib -from elixir.lib import script, scriptLines -import elixir.data as data -from elixir.data import PathList -from find_compatible_dts import FindCompatibleDTS - -verbose = False - -dts_comp_support = int(script('dts-comp')) - -compatibles_parser = FindCompatibleDTS() - -db = data.DB(lib.getDataDir(), readonly=False, shared=True, dtscomp=dts_comp_support) - -# Number of cpu threads (+2 for version indexing) -cpu = 10 -threads_list = [] - -hash_file_lock = Lock() # Lock for db.hash and db.file -blobs_lock = Lock() # Lock for db.blobs -defs_lock = Lock() # Lock for db.defs -refs_lock = Lock() # Lock for db.refs -docs_lock = Lock() # Lock for db.docs -comps_lock = Lock() # Lock for db.comps -comps_docs_lock = Lock() # Lock for db.comps_docs -tag_ready = Condition() # Waiting for new tags - -new_idxes = [] # (new idxes, Event idxes ready, Event defs ready, Event comps ready, Event vers ready) -bindings_idxes = [] # DT bindings documentation files -idx_key_mod = 1000000 -defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key. - -tags_done = False # True if all tags have been added to new_idxes - -# Progress variables [tags, finished threads] -tags_defs = [0, 0] -tags_defs_lock = Lock() -tags_refs = [0, 0] -tags_refs_lock = Lock() -tags_docs = [0, 0] -tags_docs_lock = Lock() -tags_comps = [0, 0] -tags_comps_lock = Lock() -tags_comps_docs = [0, 0] -tags_comps_docs_lock = Lock() - -class UpdateIds(Thread): - def __init__(self, tag_buf): - Thread.__init__(self, name="UpdateIdsElixir") - self.tag_buf = tag_buf - - def run(self): - global new_idxes, tags_done, tag_ready - self.index = 0 - - for tag in self.tag_buf: - - new_idxes.append((self.update_blob_ids(tag), Event(), Event(), Event(), Event())) - - progress('ids: ' + tag.decode() + ': ' + str(len(new_idxes[self.index][0])) + - ' new blobs', self.index+1) - - new_idxes[self.index][1].set() # Tell that the tag is ready - - self.index += 1 - - # Wake up waiting threads - with tag_ready: - tag_ready.notify_all() - - tags_done = True - progress('ids: Thread finished', self.index) - - def update_blob_ids(self, tag): - - global hash_file_lock, blobs_lock - - if db.vars.exists('numBlobs'): - idx = db.vars.get('numBlobs') - else: - idx = 0 - - # Get blob hashes and associated file names (without path) - blobs = scriptLines('list-blobs', '-f', tag) - - new_idxes = [] - for blob in blobs: - hash, filename = blob.split(b' ',maxsplit=1) - with blobs_lock: - blob_exist = db.blob.exists(hash) - if not blob_exist: - db.blob.put(hash, idx) - - if not blob_exist: - with hash_file_lock: - db.hash.put(idx, hash) - db.file.put(idx, filename) - - new_idxes.append(idx) - if verbose: - print(f"New blob #{idx} {hash}:{filename}") - idx += 1 - db.vars.put('numBlobs', idx) - return new_idxes - - -class UpdateVersions(Thread): - def __init__(self, tag_buf): - Thread.__init__(self, name="UpdateVersionsElixir") - self.tag_buf = tag_buf - - def run(self): - global new_idxes, tag_ready - - index = 0 - - while index < len(self.tag_buf): - if index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - tag = self.tag_buf[index] - - new_idxes[index][1].wait() # Make sure the tag is ready - - self.update_versions(tag) - - new_idxes[index][4].set() # Tell that UpdateVersions processed the tag - - progress('vers: ' + tag.decode() + ' done', index+1) - - index += 1 - - progress('vers: Thread finished', index) - - def update_versions(self, tag): - global blobs_lock - - # Get blob hashes and associated file paths - blobs = scriptLines('list-blobs', '-p', tag) - buf = [] - - for blob in blobs: - hash, path = blob.split(b' ', maxsplit=1) - with blobs_lock: - idx = db.blob.get(hash) - buf.append((idx, path)) - - buf = sorted(buf) - obj = PathList() - for idx, path in buf: - obj.append(idx, path) - - # Store DT bindings documentation files to parse them later - if path[:33] == b'Documentation/devicetree/bindings': - bindings_idxes.append(idx) - - if verbose: - print(f"Tag {tag}: adding #{idx} {path}") - db.vers.put(tag, obj, sync=True) - - -def generate_defs_caches(): - for key in db.defs.get_keys(): - value = db.defs.get(key) - for family in ['C', 'K', 'D', 'M']: - if (lib.compatibleFamily(value.get_families(), family) or - lib.compatibleMacro(value.get_macros(), family)): - db.defs_cache[family].put(key, b'') - - -class UpdateDefs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateDefsElixir") - self.index = start - self.inc = inc # Equivalent to the number of defs threads - - def run(self): - global new_idxes, tags_done, tag_ready, tags_defs, tags_defs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_defs_lock: - tags_defs[0] += 1 - - self.update_definitions(new_idxes[self.index][0]) - - new_idxes[self.index][2].set() # Tell that UpdateDefs processed the tag - - self.index += self.inc - - with tags_defs_lock: - tags_defs[1] += 1 - progress('defs: Thread ' + str(tags_defs[1]) + '/' + str(self.inc) + ' finished', tags_defs[0]) - - - def update_definitions(self, idxes): - global hash_file_lock, defs_lock, tags_defs - - for idx in idxes: - if idx % 1000 == 0: progress('defs: ' + str(idx), tags_defs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'M']: continue - - lines = scriptLines('parse-defs', hash, filename, family) - - with defs_lock: - for l in lines: - ident, type, line = l.split(b' ') - type = type.decode() - line = int(line.decode()) - - defs_idxes[idx*idx_key_mod + line] = ident - - if db.defs.exists(ident): - obj = db.defs.get(ident) - elif lib.isIdent(ident): - obj = data.DefList() - else: - continue - - obj.append(idx, type, line, family) - if verbose: - print(f"def {type} {ident} in #{idx} @ {line}") - db.defs.put(ident, obj) - - generate_defs_caches() - - -class UpdateRefs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateRefsElixir") - self.index = start - self.inc = inc # Equivalent to the number of refs threads - - def run(self): - global new_idxes, tags_done, tags_refs, tags_refs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag - - with tags_refs_lock: - tags_refs[0] += 1 - - self.update_references(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_refs_lock: - tags_refs[1] += 1 - progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0]) - - def update_references(self, idxes): - global hash_file_lock, defs_lock, refs_lock, tags_refs - - for idx in idxes: - if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family == None: continue - - prefix = b'' - # Kconfig values are saved as CONFIG_ - if family == 'K': - prefix = b'CONFIG_' - - tokens = scriptLines('tokenize-file', '-b', hash, family) - even = True - line_num = 1 - idents = {} - with defs_lock: - for tok in tokens: - even = not even - if even: - tok = prefix + tok - - if (db.defs.exists(tok) and - not ( (idx*idx_key_mod + line_num) in defs_idxes and - defs_idxes[idx*idx_key_mod + line_num] == tok ) and - (family != 'M' or tok.startswith(b'CONFIG_'))): - # We only index CONFIG_??? in makefiles - if tok in idents: - idents[tok] += ',' + str(line_num) - else: - idents[tok] = str(line_num) - - else: - line_num += tok.count(b'\1') - - with refs_lock: - for ident, lines in idents.items(): - if db.refs.exists(ident): - obj = db.refs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"ref: {ident} in #{idx} @ {lines}") - db.refs.put(ident, obj) - - -class UpdateDocs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateDocsElixir") - self.index = start - self.inc = inc # Equivalent to the number of docs threads - - def run(self): - global new_idxes, tags_done, tags_docs, tags_docs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_docs_lock: - tags_docs[0] += 1 - - self.update_doc_comments(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_docs_lock: - tags_docs[1] += 1 - progress('docs: Thread ' + str(tags_docs[1]) + '/' + str(self.inc) + ' finished', tags_docs[0]) - - def update_doc_comments(self, idxes): - global hash_file_lock, docs_lock, tags_docs - - for idx in idxes: - if idx % 1000 == 0: progress('docs: ' + str(idx), tags_docs[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'M']: continue - - lines = scriptLines('parse-docs', hash, filename) - with docs_lock: - for l in lines: - ident, line = l.split(b' ') - line = int(line.decode()) - - if db.docs.exists(ident): - obj = db.docs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, str(line), family) - if verbose: - print(f"doc: {ident} in #{idx} @ {line}") - db.docs.put(ident, obj) - - -class UpdateComps(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateCompsElixir") - self.index = start - self.inc = inc # Equivalent to the number of comps threads - - def run(self): - global new_idxes, tags_done, tags_comps, tags_comps_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - - with tags_comps_lock: - tags_comps[0] += 1 - - self.update_compatibles(new_idxes[self.index][0]) - - new_idxes[self.index][3].set() # Tell that UpdateComps processed the tag - - self.index += self.inc - - with tags_comps_lock: - tags_comps[1] += 1 - progress('comps: Thread ' + str(tags_comps[1]) + '/' + str(self.inc) + ' finished', tags_comps[0]) - - def update_compatibles(self, idxes): - global hash_file_lock, comps_lock, tags_comps - - for idx in idxes: - if idx % 1000 == 0: progress('comps: ' + str(idx), tags_comps[0]) - - with hash_file_lock: - hash = db.hash.get(idx) - filename = db.file.get(idx) - - family = lib.getFileFamily(filename) - if family in [None, 'K', 'M']: continue - - lines = compatibles_parser.run(scriptLines('get-blob', hash), family) - comps = {} - for l in lines: - ident, line = l.split(' ') - - if ident in comps: - comps[ident] += ',' + str(line) - else: - comps[ident] = str(line) - - with comps_lock: - for ident, lines in comps.items(): - if db.comps.exists(ident): - obj = db.comps.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"comps: {ident} in #{idx} @ {line}") - db.comps.put(ident, obj) - - -class UpdateCompsDocs(Thread): - def __init__(self, start, inc): - Thread.__init__(self, name="UpdateCompsDocsElixir") - self.index = start - self.inc = inc # Equivalent to the number of comps_docs threads - - def run(self): - global new_idxes, tags_done, tags_comps_docs, tags_comps_docs_lock - - while not (tags_done and self.index >= len(new_idxes)): - if self.index >= len(new_idxes): - # Wait for new tags - with tag_ready: - tag_ready.wait() - continue - - new_idxes[self.index][1].wait() # Make sure the tag is ready - new_idxes[self.index][3].wait() # Make sure UpdateComps processed the tag - new_idxes[self.index][4].wait() # Make sure UpdateVersions processed the tag - - with tags_comps_docs_lock: - tags_comps_docs[0] += 1 - - self.update_compatibles_bindings(new_idxes[self.index][0]) - - self.index += self.inc - - with tags_comps_docs_lock: - tags_comps_docs[1] += 1 - progress('comps_docs: Thread ' + str(tags_comps_docs[1]) + '/' + str(self.inc) + ' finished', tags_comps_docs[0]) - - def update_compatibles_bindings(self, idxes): - global hash_file_lock, comps_lock, comps_docs_lock, tags_comps_docs, bindings_idxes - - for idx in idxes: - if idx % 1000 == 0: progress('comps_docs: ' + str(idx), tags_comps_docs[0]) - - if not idx in bindings_idxes: # Parse only bindings doc files - continue - - with hash_file_lock: - hash = db.hash.get(idx) - - family = 'B' - lines = compatibles_parser.run(scriptLines('get-blob', hash), family) - comps_docs = {} - with comps_lock: - for l in lines: - ident, line = l.split(' ') - - if db.comps.exists(ident): - if ident in comps_docs: - comps_docs[ident] += ',' + str(line) - else: - comps_docs[ident] = str(line) - - with comps_docs_lock: - for ident, lines in comps_docs.items(): - if db.comps_docs.exists(ident): - obj = db.comps_docs.get(ident) - else: - obj = data.RefList() - - obj.append(idx, lines, family) - if verbose: - print(f"comps_docs: {ident} in #{idx} @ {line}") - db.comps_docs.put(ident, obj) - - -def progress(msg, current): - print('{} - {} ({:.1%})'.format(project, msg, current/num_tags)) - - -# Main - -# Check number of threads arg -if len(argv) >= 2 and argv[1].isdigit() : - cpu = int(argv[1]) - - if cpu < 5 : - cpu = 5 - -# Distribute threads among functions using the following rules : -# There are more (or equal) refs threads than others -# There are more (or equal) defs threads than docs or comps threads -# Example : if cpu=6 : defs=1, refs=2, docs=1, comps=1, comps_docs=1 -# if cpu=7 : defs=2, refs=2, docs=1, comps=1, comps_docs=1 -# if cpu=8 : defs=2, refs=3, docs=1, comps=1, comps_docs=1 -# if cpu=11: defs=2, refs=3, docs=2, comps=2, comps_docs=2 -quo, rem = divmod(cpu, 5) -num_th_refs = quo -num_th_defs = quo -num_th_docs = quo - -# If DT bindings support is enabled, use $quo threads for each of the 2 threads -# Otherwise add them to the remaining threads -if dts_comp_support: - num_th_comps = quo - num_th_comps_docs = quo -else : - num_th_comps = 0 - num_th_comps_docs = 0 - rem += 2*quo - -quo, rem = divmod(rem, 2) -num_th_defs += quo -num_th_refs += quo + rem - -tag_buf = [] -for tag in scriptLines('list-tags'): - if not db.vers.exists(tag): - tag_buf.append(tag) - -num_tags = len(tag_buf) -project = lib.currentProject() - -print(project + ' - found ' + str(num_tags) + ' new tags') - -if not num_tags: - # Backward-compatibility: generate defs caches if they are empty. - if db.defs_cache['C'].db.stat()['nkeys'] == 0: - generate_defs_caches() - exit(0) - -threads_list.append(UpdateIds(tag_buf)) -threads_list.append(UpdateVersions(tag_buf)) - -# Define defs threads -for i in range(num_th_defs): - threads_list.append(UpdateDefs(i, num_th_defs)) -# Define refs threads -for i in range(num_th_refs): - threads_list.append(UpdateRefs(i, num_th_refs)) -# Define docs threads -for i in range(num_th_docs): - threads_list.append(UpdateDocs(i, num_th_docs)) -# Define comps threads -for i in range(num_th_comps): - threads_list.append(UpdateComps(i, num_th_comps)) -# Define comps_docs threads -for i in range(num_th_comps_docs): - threads_list.append(UpdateCompsDocs(i, num_th_comps_docs)) - - -# Start to process tags -threads_list[0].start() - -# Wait until the first tag is ready -with tag_ready: - tag_ready.wait() - -# Start remaining threads -for i in range(1, len(threads_list)): - threads_list[i].start() - -# Make sure all threads finished -for i in range(len(threads_list)): - threads_list[i].join()