diff --git a/elixir/data.py b/elixir/data.py
index b952943c..74116af1 100755
--- a/elixir/data.py
+++ b/elixir/data.py
@@ -18,13 +18,21 @@
# You should have received a copy of the GNU Affero General Public License
# along with Elixir. If not, see .
+from typing import OrderedDict
import berkeleydb
import re
+import time
from . import lib
+from .lib import autoBytes
import os
import os.path
import errno
+# Cache size used by the update script for the largest databases. Tuple of (gigabytes, bytes).
+# https://docs.oracle.com/database/bdb181/html/api_reference/C/dbset_cachesize.html
+# https://docs.oracle.com/database/bdb181/html/programmer_reference/general_am_conf.html#am_conf_cachesize
+CACHESIZE = (2,0)
+
deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?')
deflist_macro_regex = re.compile('\dM\d+(\w)')
@@ -59,43 +67,86 @@ class DefList:
def __init__(self, data=b'#'):
self.data, self.families = data.split(b'#')
+ self.modified = False
+ self.entries = None
+ self.to_append = []
+
+ def populate_entries(self):
+ entries_modified = False
+ if self.entries is None:
+ self.entries = [
+ (int(d[0]), d[1], int(d[2]), d[3])
+ for d in deflist_regex.findall(self.data)
+ ]
+ entries_modified = True
+
+ if len(self.to_append) != 0:
+ self.entries += self.to_append
+ self.to_append = []
+ entries_modified = True
+
+ if entries_modified:
+ self.entries.sort(key=lambda x:int(x[0]))
+
def iter(self, dummy=False):
# Get all element in a list of sublists and sort them
- entries = deflist_regex.findall(self.data)
- entries.sort(key=lambda x:int(x[0]))
- for id, type, line, family in entries:
- id = int(id)
- type = defTypeR [type.decode()]
- line = int(line)
- family = family.decode()
- yield id, type, line, family
+ if self.entries is None:
+ self.populate_entries()
+
+ for id, type, line, family in self.entries:
+ yield id, defTypeR[type.decode()], int(line), family.decode()
if dummy:
yield maxId, None, None, None
- def append(self, id, type, line, family):
+ def exists(self, idx: int, line_num: int):
+ if self.entries is None:
+ self.populate_entries()
+
+ for id, _, line, _ in self.entries:
+ if id == idx and int(line) == line_num:
+ return True
+
+ return False
+
+ def append(self, id: int, type, line: int, family: str):
if type not in defTypeD:
return
- p = str(id) + defTypeD[type] + str(line) + family
- if self.data != b'':
- p = ',' + p
- self.data += p.encode()
+
+ self.modified = True
+ if self.entries is None:
+ self.to_append.append((id, defTypeD[type].encode(), line, family.encode()))
+ else:
+ self.entries.append((id, defTypeD[type].encode(), line, family.encode()))
+
self.add_family(family)
- def pack(self):
- return self.data + b'#' + self.families
+ def pack(self) -> bytes:
+ if self.entries is None:
+ to_append = b",".join([
+ str(arg[0]).encode() + arg[1] + str(arg[2]).encode() + arg[3]
+ for arg in self.to_append
+ ])
+ self.to_append = []
+ self.data += to_append
+ return self.data + b'#' + self.families
+ else:
+ self.data = b",".join([
+ str(arg[0]).encode() + arg[1] + str(arg[2]).encode() + arg[3]
+ for arg in self.entries
+ ])
+ return self.data + b'#' + self.families
- def add_family(self, family):
- family = family.encode()
+ def add_family(self, family: str):
if not family in self.families.split(b','):
if self.families != b'':
- family = b',' + family
- self.families += family
+ family = ',' + family
+ self.families += family.encode()
def get_families(self):
- return self.families.decode().split(',')
+ return [f.decode() for f in self.families.split(b',')]
def get_macros(self):
- return deflist_macro_regex.findall(self.data.decode()) or ''
+ return (deflist_macro_regex.findall(self.data.decode()) + [entry[1] for entry in self.to_append]) or ''
class PathList:
'''Stores associations between a blob ID and a file path.
@@ -124,69 +175,205 @@ class RefList:
and the corresponding family.'''
def __init__(self, data=b''):
self.data = data
+ self.entries = None
+ self.to_append = []
+ self.sorted = False
+ self.modified = False
+
+ def decode_entry(self, k):
+ return (int(k[0].decode()), k[1].decode(), k[2].decode())
+
+ def populate_entries(self):
+ self.entries = [self.decode_entry(x.split(b':')) for x in self.data.split(b'\n')[:-1]]
+ self.entries += self.to_append
+ self.to_append = []
+ self.entries.sort(key=lambda x:int(x[0]))
def iter(self, dummy=False):
- # Split all elements in a list of sublists and sort them
- entries = [x.split(b':') for x in self.data.split(b'\n')[:-1]]
- entries.sort(key=lambda x:int(x[0]))
- for b, c, d in entries:
- b = int(b.decode())
- c = c.decode()
- d = d.decode()
+ if self.entries is None:
+ self.populate_entries()
+
+ for b, c, d in self.entries:
yield b, c, d
if dummy:
yield maxId, None, None
def append(self, id, lines, family):
- p = str(id) + ':' + lines + ':' + family + '\n'
- self.data += p.encode()
+ self.modified = True
+ if self.entries is not None:
+ self.entries.append((id, lines, family))
+ else:
+ self.to_append.append((id, lines, family))
def pack(self):
- return self.data
+ if self.entries is not None:
+ assert len(self.to_append) == 0
+ result = "".join([str(id) + ":" + lines + ":" + family + "\n" for id, lines, family in self.entries])
+ return result.encode()
+ elif len(self.to_append) != 0:
+ result = "".join([str(id) + ":" + lines + ":" + family + "\n" for id, lines, family in self.to_append])
+ self.data += result.encode()
+ self.to_append = []
+ return self.data
class BsdDB:
- def __init__(self, filename, readonly, contentType, shared=False):
+ def __init__(self, filename, readonly, contentType, shared=False, cachesize=None):
self.filename = filename
self.db = berkeleydb.db.DB()
- flags = berkeleydb.db.DB_THREAD if shared else 0
+ self.flags = berkeleydb.db.DB_THREAD if shared else 0
- if readonly:
- flags |= berkeleydb.db.DB_RDONLY
- self.db.open(filename, flags=flags)
+ self.readonly = readonly
+ if self.readonly:
+ self.flags |= berkeleydb.db.DB_RDONLY
else:
- flags |= berkeleydb.db.DB_CREATE
- self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+ self.flags |= berkeleydb.db.DB_CREATE
+
+ if cachesize is not None:
+ self.db.set_cachesize(cachesize[0], cachesize[1])
+
+ self.open()
self.ctype = contentType
+ def open(self):
+ if self.readonly:
+ self.db.open(self.filename, flags=self.flags)
+ else:
+ self.db.open(self.filename, flags=self.flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+
def exists(self, key):
- key = lib.autoBytes(key)
+ key = autoBytes(key)
return self.db.exists(key)
def get(self, key):
- key = lib.autoBytes(key)
+ key = autoBytes(key)
p = self.db.get(key)
- return self.ctype(p) if p is not None else None
+ if p is None:
+ return None
+ p = self.ctype(p)
+ return p
def get_keys(self):
return self.db.keys()
def put(self, key, val, sync=False):
- key = lib.autoBytes(key)
- val = lib.autoBytes(val)
+ key = autoBytes(key)
+ val = autoBytes(val)
+ if type(val) is not bytes:
+ val = val.pack()
+ self.db.put(key, val)
+ if sync:
+ self.db.sync()
+
+ def sync(self):
+ self.db.sync()
+
+ def close(self):
+ self.db.close()
+
+ def __len__(self):
+ return self.db.stat()["nkeys"]
+
+class CachedBsdDB:
+ def __init__(self, filename, readonly, contentType, cachesize):
+ self.filename = filename
+ self.db = None
+ self.readonly = readonly
+
+ self.cachesize = cachesize
+ self.cache = OrderedDict()
+
+ self.open()
+
+ self.ctype = contentType
+
+ def open(self):
+ if self.db is None:
+ self.db = berkeleydb.db.DB()
+
+ flags = 0
+
+ if self.readonly:
+ flags |= berkeleydb.db.DB_RDONLY
+ self.db.open(self.filename, flags=flags)
+ else:
+ flags |= berkeleydb.db.DB_CREATE
+ self.db.open(self.filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+
+ def exists(self, key):
+ if key in self.cache:
+ return True
+
+ return self.db.exists(autoBytes(key))
+
+ def get(self, key):
+ if key in self.cache:
+ self.cache.move_to_end(key)
+ return self.cache[key]
+
+ p = self.db.get(autoBytes(key))
+ if p is None:
+ return None
+ p = self.ctype(p)
+
+ self.cache[key] = p
+ self.cache.move_to_end(key)
+ if len(self.cache) > self.cachesize:
+ old_k, old_v = self.cache.popitem(last=False)
+ if old_v.modified:
+ self.put_raw(old_k, old_v)
+
+ return p
+
+ def get_keys(self):
+ self.sync()
+ return self.db.keys()
+
+ def put(self, key, val):
+ if self.readonly:
+ raise Exception("database is readonly")
+
+ self.cache[key] = val
+ self.cache.move_to_end(key)
+ if len(self.cache) > self.cachesize:
+ old_k, old_v = self.cache.popitem(last=False)
+ if old_v.modified:
+ self.put_raw(old_k, old_v)
+
+ def put_raw(self, key, val, sync=False):
+ if self.readonly:
+ raise Exception("database is readonly")
+
+ key = autoBytes(key)
+ val = autoBytes(val)
if type(val) is not bytes:
val = val.pack()
self.db.put(key, val)
if sync:
self.db.sync()
+ def sync(self):
+ start = time.time()
+ flushed = 0
+ if not self.readonly:
+ for k, v in self.cache.items():
+ if v.modified:
+ v.modified = False
+ self.put_raw(k, v)
+ flushed += 1
+
+ print("synced", flushed, "/", len(self.cache), time.time()-start)
+ self.db.sync()
+
def close(self):
+ self.sync()
self.db.close()
+ self.db = None
def __len__(self):
return self.db.stat()["nkeys"]
class DB:
- def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
+ def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache=None):
if os.path.isdir(dir):
self.dir = dir
else:
@@ -194,6 +381,11 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
ro = readonly
+ if update_cache:
+ db_cls = lambda dir, ro, ctype: CachedBsdDB(dir, ro, ctype, cachesize=update_cache)
+ else:
+ db_cls = lambda dir, ro, ctype: BsdDB(dir, ro, ctype, shared=shared)
+
self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
# Key-value store of basic information
self.blob = BsdDB(dir + '/blobs.db', ro, lambda x: int(x.decode()), shared=shared)
@@ -203,7 +395,7 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
# Map serial number to filename
self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
- self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared)
+ self.defs = db_cls(dir + '/definitions.db', ro, DefList)
self.defs_cache = {}
NOOP = lambda x: x
self.defs_cache['C'] = BsdDB(dir + '/definitions-cache-C.db', ro, NOOP, shared=shared)
@@ -211,12 +403,12 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
self.defs_cache['D'] = BsdDB(dir + '/definitions-cache-D.db', ro, NOOP, shared=shared)
self.defs_cache['M'] = BsdDB(dir + '/definitions-cache-M.db', ro, NOOP, shared=shared)
assert sorted(self.defs_cache.keys()) == sorted(lib.CACHED_DEFINITIONS_FAMILIES)
- self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared)
- self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared)
+ self.refs = db_cls(dir + '/references.db', ro, RefList)
+ self.docs = db_cls(dir + '/doccomments.db', ro, RefList)
self.dtscomp = dtscomp
if dtscomp:
- self.comps = BsdDB(dir + '/compatibledts.db', ro, RefList, shared=shared)
- self.comps_docs = BsdDB(dir + '/compatibledts_docs.db', ro, RefList, shared=shared)
+ self.comps = db_cls(dir + '/compatibledts.db', ro, RefList)
+ self.comps_docs = db_cls(dir + '/compatibledts_docs.db', ro, RefList)
# Use a RefList in case there are multiple doc comments for an identifier
def close(self):
diff --git a/elixir/lib.py b/elixir/lib.py
index 7d7d0757..2442e107 100755
--- a/elixir/lib.py
+++ b/elixir/lib.py
@@ -21,6 +21,7 @@
import sys
import logging
import subprocess, os
+from typing import List
logger = logging.getLogger(__name__)
@@ -46,7 +47,7 @@ def run_cmd(*args, env=None):
# Invoke ./script.sh with the given arguments
# Returns the list of output lines
-def scriptLines(*args, env=None):
+def scriptLines(*args, env=None) -> List[bytes]:
p = script(*args, env=env)
p = p.split(b'\n')
del p[-1]
diff --git a/elixir/update.py b/elixir/update.py
new file mode 100644
index 00000000..cd2d2493
--- /dev/null
+++ b/elixir/update.py
@@ -0,0 +1,441 @@
+import os.path
+import logging
+import time
+import signal
+import bisect
+import cProfile
+from multiprocessing import cpu_count, set_start_method
+from multiprocessing.pool import Pool
+from typing import Dict, Iterable, List, Optional, Tuple
+from collections import OrderedDict
+
+from find_compatible_dts import FindCompatibleDTS
+
+from elixir.data import DB, BsdDB, CachedBsdDB, DefList, PathList, RefList
+from elixir.lib import (
+ compatibleFamily,
+ compatibleMacro,
+ getDataDir,
+ getFileFamily,
+ isIdent,
+ script,
+ scriptLines,
+)
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+# File identification - id, hash, filename
+FileId = Tuple[int, bytes, str]
+
+# Definitions parsing output, ident -> list of (file_idx, type, line, family)
+DefsDict = Dict[bytes, List[Tuple[int, str, int, str]]]
+
+# References parsing output, ident -> (file_idx, family) -> list of lines
+RefsDict = Dict[bytes, Dict[Tuple[int, str], List[int]]]
+
+# Generic dictionary of ident -> list of lines
+LinesListDict = Dict[str, List[int]]
+
+# File idx -> (hash, filename, is a new file?)
+IdxCache = Dict[int, Tuple[bytes, str, bool]]
+
+class Cache:
+ def __init__(self, size):
+ self.cache = OrderedDict()
+ self.size = size
+
+ def contains(self, key):
+ return key in self.cache
+
+ def get(self, key):
+ self.cache.move_to_end(key)
+ return self.cache[key]
+
+ def put(self, key, val):
+ self.cache[key] = val
+ self.cache.move_to_end(key)
+ if len(self.cache) > self.size:
+ self.cache.popitem(last=False)
+
+# Check if definition for ident is visible in current version
+def def_in_version(def_ident: DefList, idx_to_hash_and_filename: IdxCache) -> bool:
+ def_ident.populate_entries()
+
+ prev_idx = None
+ for def_idx, _, _, _ in reversed(def_ident.entries):
+ if def_idx == prev_idx:
+ continue
+ if def_idx in idx_to_hash_and_filename:
+ return True
+ prev_idx = def_idx
+ return False
+
+# Add definitions to database
+def add_defs(db: DB, defs: DefsDict):
+ for ident, occ_list in defs.items():
+ obj = db.defs.get(ident)
+ if obj is None:
+ obj = DefList()
+
+ for (idx, type, line, family) in occ_list:
+ obj.append(idx, type, line, family)
+
+ db.defs.put(ident, obj)
+
+# Add references to database
+def add_refs(db: DB, in_ver_cache: Cache, idx_to_hash_and_filename: IdxCache, refs: RefsDict):
+ for ident, idx_to_lines in refs.items():
+ deflist = db.defs.get(ident)
+ if deflist is None:
+ continue
+
+ if not in_ver_cache.contains(ident):
+ in_version = def_in_version(deflist, idx_to_hash_and_filename)
+ if not in_version:
+ in_ver_cache.put(ident, False)
+ continue
+ in_ver_cache.put(ident, True)
+ elif not in_ver_cache.get(ident):
+ continue
+
+ obj = db.refs.get(ident)
+ if obj is None:
+ obj = RefList()
+
+ for (idx, family), lines_str in idx_to_lines.items():
+ obj.append(idx, lines_str, family)
+
+ db.refs.put(ident, obj)
+
+# Add documentation references to database
+def add_docs(db: DB, idx: int, family: str, docs: Dict[str, List[int]]):
+ add_to_lineslist(db.docs, idx, family, docs)
+
+# Add compatible references to database
+def add_comps(db: DB, idx: int, family: str, comps: Dict[str, List[int]]):
+ add_to_lineslist(db.comps, idx, family, comps)
+
+# Add compatible docs to database
+def add_comps_docs(db: DB, idx: int, family: str, comps_docs: Dict[str, List[int]]):
+ comps_result = {}
+ for ident, v in comps_docs.items():
+ if db.comps.exists(ident):
+ comps_result[ident] = v
+
+ add_to_lineslist(db.comps_docs, idx, family, comps_result)
+
+# Add data to a database file that uses lines list schema
+def add_to_lineslist(db_file: BsdDB, idx: int, family: str, to_add: Dict[str, List[int]]):
+ for ident, lines in to_add.items():
+ obj = db_file.get(ident)
+ if obj is None:
+ obj = RefList()
+
+ lines_str = ','.join((str(n) for n in lines))
+ obj.append(idx, lines_str, family)
+ db_file.put(ident, obj)
+
+
+# Adds blob list to database, returns blob id -> (hash, filename) dict
+def collect_blobs(db: DB, tag: bytes) -> IdxCache:
+ idx = db.vars.get('numBlobs')
+ if idx is None:
+ idx = 0
+
+ # Get blob hashes and associated file names (without path)
+ blobs = scriptLines('list-blobs', '-p', tag)
+ versionBuf = []
+ idx_to_hash_and_filename = {}
+
+ # Collect new blobs, assign database ids to the blobs
+ for blob in blobs:
+ hash, path = blob.split(b' ',maxsplit=1)
+ filename = os.path.basename(path.decode())
+ blob_idx = db.blob.get(hash)
+
+ if blob_idx is not None:
+ versionBuf.append((blob_idx, path))
+ if blob_idx not in idx_to_hash_and_filename:
+ idx_to_hash_and_filename[blob_idx] = (hash, filename, False)
+ else:
+ versionBuf.append((idx, path))
+ idx_to_hash_and_filename[idx] = (hash, filename, True)
+ db.blob.put(hash, idx)
+ db.hash.put(idx, hash)
+ db.file.put(idx, filename)
+ idx += 1
+
+ # Update number of blobs in the database
+ db.vars.put('numBlobs', idx)
+
+ # Add mapping blob id -> path to version database
+ versionBuf.sort()
+ obj = PathList()
+ for i, path in versionBuf:
+ obj.append(i, path)
+ db.vers.put(tag, obj, sync=True)
+
+ return idx_to_hash_and_filename
+
+# Generate definitions cache databases
+def generate_defs_caches(db: DB):
+ for key in db.defs.get_keys():
+ value = db.defs.get(key)
+ for family in ['C', 'K', 'D', 'M']:
+ if (compatibleFamily(value.get_families(), family) or
+ compatibleMacro(value.get_macros(), family)):
+ db.defs_cache[family].put(key, b'')
+
+
+# Collect definitions from ctags for a file
+def get_defs(file_id: FileId) -> Optional[DefsDict]:
+ idx, hash, filename = file_id
+ defs = {}
+ family = getFileFamily(filename)
+ if family in (None, 'M'):
+ return None
+
+ lines = scriptLines('parse-defs', hash, filename, family)
+
+ for l in lines:
+ ident, type, line = l.split(b' ')
+ type = type.decode()
+ line = int(line.decode())
+ if isIdent(ident):
+ if ident not in defs:
+ defs[ident] = []
+ defs[ident].append((idx, type, line, family))
+
+ return defs
+
+def call_get_refs(arg: Tuple[FileId, str]) -> Optional[RefsDict]:
+ return get_refs(arg[0], CachedBsdDB(arg[1], True, DefList, 1000))
+
+# Collect references from the tokenizer for a file
+def get_refs(file_id: FileId, defs: CachedBsdDB) -> Optional[RefsDict]:
+ idx, hash, filename = file_id
+ refs = {}
+ family = getFileFamily(filename)
+ if family is None:
+ return
+
+ # Kconfig values are saved as CONFIG_
+ prefix = b'' if family != 'K' else b'CONFIG_'
+
+ tokens = scriptLines('tokenize-file', '-b', hash, family)
+ even = True
+ line_num = 1
+
+ def deflist_exists(deflist, idx: int, line: int):
+ deflist.populate_entries()
+ start = bisect.bisect_left(deflist.entries, idx, key=lambda x: x[0])
+
+ for def_idx, _, def_line, _ in deflist.entries[start:]:
+ if def_idx == idx:
+ if def_line == line:
+ return True
+ else:
+ break
+
+ return False
+
+ for tok in tokens:
+ even = not even
+ if even:
+ tok = prefix + tok
+
+ # We only index CONFIG_??? in makefiles
+ if (family != 'M' or tok.startswith(b'CONFIG_')):
+ deflist = defs.get(tok)
+ if not deflist:
+ continue
+
+ if deflist_exists(deflist, idx, line_num):
+ continue
+
+ if tok not in refs:
+ refs[tok] = {}
+
+ if (idx, family) not in refs[tok]:
+ refs[tok][(idx, family)] = str(line_num)
+ else:
+ refs[tok][(idx, family)] += "," + str(line_num)
+
+ else:
+ line_num += tok.count(b'\1')
+
+
+ return refs
+
+# Collect compatible script output into lineslinst-schema compatible format
+def collect_get_blob_output(lines: Iterable[str]) -> LinesListDict:
+ results = {}
+ for l in lines:
+ ident, line = l.split(' ')
+ line = int(line)
+
+ if ident not in results:
+ results[ident] = []
+ results[ident].append(line)
+
+ return results
+
+# Collect docs from doc comments script for a single file
+def get_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+ idx, hash, filename = file_id
+ family = getFileFamily(filename)
+ if family in (None, 'M'): return
+
+ start = time.time()
+ lines = (line.decode() for line in scriptLines('parse-docs', hash, filename))
+ parser_time = time.time()-start
+
+ if parser_time > 10:
+ print("docs timeout", parser_time, file_id)
+
+ docs = collect_get_blob_output(lines)
+
+ return (idx, family, docs)
+
+# Collect compatible references for a single file
+def get_comps(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+ idx, hash, filename = file_id
+ family = getFileFamily(filename)
+ if family in (None, 'K', 'M'): return
+
+ compatibles_parser = FindCompatibleDTS()
+
+ start = time.time()
+ lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
+ parser_time = time.time()-start
+
+ if parser_time > 10:
+ print("comps docs timeout", parser_time, file_id)
+
+ comps = collect_get_blob_output(lines)
+
+ return (idx, family, comps)
+
+# Collect compatible documentation references for a single file
+def get_comps_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+ idx, hash, _ = file_id
+ family = 'B'
+
+ compatibles_parser = FindCompatibleDTS()
+ lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
+ comps_docs = {}
+ for l in lines:
+ ident, line = l.split(' ')
+
+ if ident not in comps_docs:
+ comps_docs[ident] = []
+ comps_docs[ident].append(int(line))
+
+ return (idx, family, comps_docs)
+
+
+# Update a single version - collects data from all the stages and saves it in the database
+def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
+ idx_to_hash_and_filename = collect_blobs(db, tag)
+
+ # Collect blobs to process and split list of blobs into chunks
+ idxes = [(idx, hash, filename) for (idx, (hash, filename, new)) in idx_to_hash_and_filename.items() if new]
+ chunksize = int(len(idxes) / cpu_count())
+ chunksize = min(max(1, chunksize), 100)
+
+ logger.info("collecting blobs done, new blobs: %d", len(idxes))
+
+ for result in pool.imap_unordered(get_defs, idxes, chunksize):
+ if result is not None:
+ add_defs(db, result)
+
+ logger.info("defs done")
+
+ for result in pool.imap_unordered(get_docs, idxes, chunksize):
+ if result is not None:
+ add_docs(db, *result)
+
+ logger.info("docs done")
+
+ if dts_comp_support:
+ comp_idxes = [idx for idx in idxes if getFileFamily(idx[2]) not in (None, 'K', 'M')]
+ comp_chunksize = int(len(comp_idxes) / cpu_count())
+ comp_chunksize = min(max(1, comp_chunksize), 100)
+ for result in pool.imap_unordered(get_comps, comp_idxes, comp_chunksize):
+ if result is not None:
+ add_comps(db, *result)
+
+ logger.info("dts comps done")
+
+ for result in pool.imap_unordered(get_comps_docs, idxes, chunksize):
+ if result is not None:
+ add_comps_docs(db, *result)
+
+ logger.info("dts comps docs done")
+
+
+ #with cProfile.Profile() as pr:
+ db.defs.close()
+ db.defs.readonly = True
+ db.defs.open()
+
+ in_def_cache = Cache(10000)
+ ref_idxes = [(idx, db.defs.filename) for idx in idxes]
+ ref_chunksize = int(len(ref_idxes) / cpu_count())
+ ref_chunksize = min(max(1, ref_chunksize), 100)
+ #pr.dump_stats("5refs"+str(int(time.time())))
+
+ logger.info("ref blobs: %d", len(ref_idxes))
+
+ for result in pool.imap_unordered(call_get_refs, ref_idxes, ref_chunksize):
+ if result is not None:
+ add_refs(db, in_def_cache, idx_to_hash_and_filename, result)
+
+ db.defs.close()
+ db.defs.readonly = False
+ db.defs.open()
+
+ logger.info("refs done")
+ logger.info("update done")
+
+
+sigint_caught = False
+
+def sigint_handler(signum, _frame):
+ global sigint_caught
+ if not sigint_caught:
+ logger.info("Caught SIGINT... the script will exit after processing this version")
+ signal.signal(signum, signal.SIG_IGN)
+ sigint_caught = True
+
+signal.signal(signal.SIGINT, sigint_handler)
+
+def ignore_sigint():
+ signal.signal(signal.SIGINT, lambda _,__: None)
+
+if __name__ == "__main__":
+
+ dts_comp_support = bool(int(script('dts-comp')))
+ db = DB(getDataDir(), readonly=False, dtscomp=dts_comp_support, shared=False, update_cache=100000)
+
+ set_start_method('spawn')
+ with Pool(initializer=ignore_sigint) as pool:
+ for tag in scriptLines('list-tags'):
+ #if not tag.startswith(b'v6'):
+ # continue
+
+ if sigint_caught:
+ break
+
+ if not db.vers.exists(tag):
+ logger.info("updating tag %s", tag)
+ update_version(db, tag, pool, dts_comp_support)
+
+ logger.info("generating def caches")
+ generate_defs_caches(db)
+ logger.info("def caches generated")
+ db.close()
+ logger.info("database closed")
+
+
diff --git a/find_compatible_dts.py b/find_compatible_dts.py
index 8aec94d6..a1a356f1 100755
--- a/find_compatible_dts.py
+++ b/find_compatible_dts.py
@@ -31,6 +31,8 @@ def __init__(self):
self.regex_bindings = re.compile("([\w-]+,?[\w-]+)")
def parse_c(self, content):
+ if "compatible" not in content:
+ return []
return self.regex_c.findall(content)
def parse_dts(self, content):
diff --git a/script.sh b/script.sh
index 3bbff2a7..656a2633 100755
--- a/script.sh
+++ b/script.sh
@@ -165,7 +165,7 @@ parse_defs_C()
git cat-file blob "$opt1" > "$full_path"
# Use ctags to parse most of the defs
- ctags -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" |
+ ctags -u -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" |
grep -avE -e "^operator " -e "^CONFIG_" |
awk '{print $1" "$2" "$3}'
@@ -182,7 +182,7 @@ parse_defs_K()
tmp=`mktemp -d`
full_path=$tmp/$opt2
git cat-file blob "$opt1" > "$full_path"
- ctags -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" |
+ ctags -u -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" |
awk '{print "CONFIG_"$1" "$2" "$3}'
rm "$full_path"
rmdir $tmp
@@ -193,7 +193,7 @@ parse_defs_D()
tmp=`mktemp -d`
full_path=$tmp/$opt2
git cat-file blob "$opt1" > "$full_path"
- ctags -x --language-force=dts "$full_path" |
+ ctags -u -x --language-force=dts "$full_path" |
awk '{print $1" "$2" "$3}'
rm "$full_path"
rmdir $tmp
@@ -204,7 +204,13 @@ parse_docs()
tmpfile=`mktemp`
git cat-file blob "$opt1" > "$tmpfile"
- "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?"
+
+ # Shortcut: if '/**' isn't present in the file, it cannot contain a doc.
+ # This avoids calling find-file-doc-comments.pl on most files, which is an
+ # expensive operation.
+ if grep -qF '/**' "$tmpfile"; then
+ "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?"
+ fi
rm -rf "$tmpfile"
}
diff --git a/update.py b/update.py
deleted file mode 100755
index 9d84ff31..00000000
--- a/update.py
+++ /dev/null
@@ -1,638 +0,0 @@
-#!/usr/bin/env python3
-
-# This file is part of Elixir, a source code cross-referencer.
-#
-# Copyright (C) 2017--2020 Mikaƫl Bouillot
-# Maxime Chretien
-# and contributors
-#
-# Elixir is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# Elixir is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with Elixir. If not, see .
-
-# Throughout, an "idx" is the sequential number associated with a blob.
-# This is different from that blob's Git hash.
-
-from sys import argv
-from threading import Thread, Lock, Event, Condition
-
-import elixir.lib as lib
-from elixir.lib import script, scriptLines
-import elixir.data as data
-from elixir.data import PathList
-from find_compatible_dts import FindCompatibleDTS
-
-verbose = False
-
-dts_comp_support = int(script('dts-comp'))
-
-compatibles_parser = FindCompatibleDTS()
-
-db = data.DB(lib.getDataDir(), readonly=False, shared=True, dtscomp=dts_comp_support)
-
-# Number of cpu threads (+2 for version indexing)
-cpu = 10
-threads_list = []
-
-hash_file_lock = Lock() # Lock for db.hash and db.file
-blobs_lock = Lock() # Lock for db.blobs
-defs_lock = Lock() # Lock for db.defs
-refs_lock = Lock() # Lock for db.refs
-docs_lock = Lock() # Lock for db.docs
-comps_lock = Lock() # Lock for db.comps
-comps_docs_lock = Lock() # Lock for db.comps_docs
-tag_ready = Condition() # Waiting for new tags
-
-new_idxes = [] # (new idxes, Event idxes ready, Event defs ready, Event comps ready, Event vers ready)
-bindings_idxes = [] # DT bindings documentation files
-idx_key_mod = 1000000
-defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
-
-tags_done = False # True if all tags have been added to new_idxes
-
-# Progress variables [tags, finished threads]
-tags_defs = [0, 0]
-tags_defs_lock = Lock()
-tags_refs = [0, 0]
-tags_refs_lock = Lock()
-tags_docs = [0, 0]
-tags_docs_lock = Lock()
-tags_comps = [0, 0]
-tags_comps_lock = Lock()
-tags_comps_docs = [0, 0]
-tags_comps_docs_lock = Lock()
-
-class UpdateIds(Thread):
- def __init__(self, tag_buf):
- Thread.__init__(self, name="UpdateIdsElixir")
- self.tag_buf = tag_buf
-
- def run(self):
- global new_idxes, tags_done, tag_ready
- self.index = 0
-
- for tag in self.tag_buf:
-
- new_idxes.append((self.update_blob_ids(tag), Event(), Event(), Event(), Event()))
-
- progress('ids: ' + tag.decode() + ': ' + str(len(new_idxes[self.index][0])) +
- ' new blobs', self.index+1)
-
- new_idxes[self.index][1].set() # Tell that the tag is ready
-
- self.index += 1
-
- # Wake up waiting threads
- with tag_ready:
- tag_ready.notify_all()
-
- tags_done = True
- progress('ids: Thread finished', self.index)
-
- def update_blob_ids(self, tag):
-
- global hash_file_lock, blobs_lock
-
- if db.vars.exists('numBlobs'):
- idx = db.vars.get('numBlobs')
- else:
- idx = 0
-
- # Get blob hashes and associated file names (without path)
- blobs = scriptLines('list-blobs', '-f', tag)
-
- new_idxes = []
- for blob in blobs:
- hash, filename = blob.split(b' ',maxsplit=1)
- with blobs_lock:
- blob_exist = db.blob.exists(hash)
- if not blob_exist:
- db.blob.put(hash, idx)
-
- if not blob_exist:
- with hash_file_lock:
- db.hash.put(idx, hash)
- db.file.put(idx, filename)
-
- new_idxes.append(idx)
- if verbose:
- print(f"New blob #{idx} {hash}:{filename}")
- idx += 1
- db.vars.put('numBlobs', idx)
- return new_idxes
-
-
-class UpdateVersions(Thread):
- def __init__(self, tag_buf):
- Thread.__init__(self, name="UpdateVersionsElixir")
- self.tag_buf = tag_buf
-
- def run(self):
- global new_idxes, tag_ready
-
- index = 0
-
- while index < len(self.tag_buf):
- if index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- tag = self.tag_buf[index]
-
- new_idxes[index][1].wait() # Make sure the tag is ready
-
- self.update_versions(tag)
-
- new_idxes[index][4].set() # Tell that UpdateVersions processed the tag
-
- progress('vers: ' + tag.decode() + ' done', index+1)
-
- index += 1
-
- progress('vers: Thread finished', index)
-
- def update_versions(self, tag):
- global blobs_lock
-
- # Get blob hashes and associated file paths
- blobs = scriptLines('list-blobs', '-p', tag)
- buf = []
-
- for blob in blobs:
- hash, path = blob.split(b' ', maxsplit=1)
- with blobs_lock:
- idx = db.blob.get(hash)
- buf.append((idx, path))
-
- buf = sorted(buf)
- obj = PathList()
- for idx, path in buf:
- obj.append(idx, path)
-
- # Store DT bindings documentation files to parse them later
- if path[:33] == b'Documentation/devicetree/bindings':
- bindings_idxes.append(idx)
-
- if verbose:
- print(f"Tag {tag}: adding #{idx} {path}")
- db.vers.put(tag, obj, sync=True)
-
-
-def generate_defs_caches():
- for key in db.defs.get_keys():
- value = db.defs.get(key)
- for family in ['C', 'K', 'D', 'M']:
- if (lib.compatibleFamily(value.get_families(), family) or
- lib.compatibleMacro(value.get_macros(), family)):
- db.defs_cache[family].put(key, b'')
-
-
-class UpdateDefs(Thread):
- def __init__(self, start, inc):
- Thread.__init__(self, name="UpdateDefsElixir")
- self.index = start
- self.inc = inc # Equivalent to the number of defs threads
-
- def run(self):
- global new_idxes, tags_done, tag_ready, tags_defs, tags_defs_lock
-
- while not (tags_done and self.index >= len(new_idxes)):
- if self.index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- new_idxes[self.index][1].wait() # Make sure the tag is ready
-
- with tags_defs_lock:
- tags_defs[0] += 1
-
- self.update_definitions(new_idxes[self.index][0])
-
- new_idxes[self.index][2].set() # Tell that UpdateDefs processed the tag
-
- self.index += self.inc
-
- with tags_defs_lock:
- tags_defs[1] += 1
- progress('defs: Thread ' + str(tags_defs[1]) + '/' + str(self.inc) + ' finished', tags_defs[0])
-
-
- def update_definitions(self, idxes):
- global hash_file_lock, defs_lock, tags_defs
-
- for idx in idxes:
- if idx % 1000 == 0: progress('defs: ' + str(idx), tags_defs[0])
-
- with hash_file_lock:
- hash = db.hash.get(idx)
- filename = db.file.get(idx)
-
- family = lib.getFileFamily(filename)
- if family in [None, 'M']: continue
-
- lines = scriptLines('parse-defs', hash, filename, family)
-
- with defs_lock:
- for l in lines:
- ident, type, line = l.split(b' ')
- type = type.decode()
- line = int(line.decode())
-
- defs_idxes[idx*idx_key_mod + line] = ident
-
- if db.defs.exists(ident):
- obj = db.defs.get(ident)
- elif lib.isIdent(ident):
- obj = data.DefList()
- else:
- continue
-
- obj.append(idx, type, line, family)
- if verbose:
- print(f"def {type} {ident} in #{idx} @ {line}")
- db.defs.put(ident, obj)
-
- generate_defs_caches()
-
-
-class UpdateRefs(Thread):
- def __init__(self, start, inc):
- Thread.__init__(self, name="UpdateRefsElixir")
- self.index = start
- self.inc = inc # Equivalent to the number of refs threads
-
- def run(self):
- global new_idxes, tags_done, tags_refs, tags_refs_lock
-
- while not (tags_done and self.index >= len(new_idxes)):
- if self.index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- new_idxes[self.index][1].wait() # Make sure the tag is ready
- new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag
-
- with tags_refs_lock:
- tags_refs[0] += 1
-
- self.update_references(new_idxes[self.index][0])
-
- self.index += self.inc
-
- with tags_refs_lock:
- tags_refs[1] += 1
- progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0])
-
- def update_references(self, idxes):
- global hash_file_lock, defs_lock, refs_lock, tags_refs
-
- for idx in idxes:
- if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0])
-
- with hash_file_lock:
- hash = db.hash.get(idx)
- filename = db.file.get(idx)
-
- family = lib.getFileFamily(filename)
- if family == None: continue
-
- prefix = b''
- # Kconfig values are saved as CONFIG_
- if family == 'K':
- prefix = b'CONFIG_'
-
- tokens = scriptLines('tokenize-file', '-b', hash, family)
- even = True
- line_num = 1
- idents = {}
- with defs_lock:
- for tok in tokens:
- even = not even
- if even:
- tok = prefix + tok
-
- if (db.defs.exists(tok) and
- not ( (idx*idx_key_mod + line_num) in defs_idxes and
- defs_idxes[idx*idx_key_mod + line_num] == tok ) and
- (family != 'M' or tok.startswith(b'CONFIG_'))):
- # We only index CONFIG_??? in makefiles
- if tok in idents:
- idents[tok] += ',' + str(line_num)
- else:
- idents[tok] = str(line_num)
-
- else:
- line_num += tok.count(b'\1')
-
- with refs_lock:
- for ident, lines in idents.items():
- if db.refs.exists(ident):
- obj = db.refs.get(ident)
- else:
- obj = data.RefList()
-
- obj.append(idx, lines, family)
- if verbose:
- print(f"ref: {ident} in #{idx} @ {lines}")
- db.refs.put(ident, obj)
-
-
-class UpdateDocs(Thread):
- def __init__(self, start, inc):
- Thread.__init__(self, name="UpdateDocsElixir")
- self.index = start
- self.inc = inc # Equivalent to the number of docs threads
-
- def run(self):
- global new_idxes, tags_done, tags_docs, tags_docs_lock
-
- while not (tags_done and self.index >= len(new_idxes)):
- if self.index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- new_idxes[self.index][1].wait() # Make sure the tag is ready
-
- with tags_docs_lock:
- tags_docs[0] += 1
-
- self.update_doc_comments(new_idxes[self.index][0])
-
- self.index += self.inc
-
- with tags_docs_lock:
- tags_docs[1] += 1
- progress('docs: Thread ' + str(tags_docs[1]) + '/' + str(self.inc) + ' finished', tags_docs[0])
-
- def update_doc_comments(self, idxes):
- global hash_file_lock, docs_lock, tags_docs
-
- for idx in idxes:
- if idx % 1000 == 0: progress('docs: ' + str(idx), tags_docs[0])
-
- with hash_file_lock:
- hash = db.hash.get(idx)
- filename = db.file.get(idx)
-
- family = lib.getFileFamily(filename)
- if family in [None, 'M']: continue
-
- lines = scriptLines('parse-docs', hash, filename)
- with docs_lock:
- for l in lines:
- ident, line = l.split(b' ')
- line = int(line.decode())
-
- if db.docs.exists(ident):
- obj = db.docs.get(ident)
- else:
- obj = data.RefList()
-
- obj.append(idx, str(line), family)
- if verbose:
- print(f"doc: {ident} in #{idx} @ {line}")
- db.docs.put(ident, obj)
-
-
-class UpdateComps(Thread):
- def __init__(self, start, inc):
- Thread.__init__(self, name="UpdateCompsElixir")
- self.index = start
- self.inc = inc # Equivalent to the number of comps threads
-
- def run(self):
- global new_idxes, tags_done, tags_comps, tags_comps_lock
-
- while not (tags_done and self.index >= len(new_idxes)):
- if self.index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- new_idxes[self.index][1].wait() # Make sure the tag is ready
-
- with tags_comps_lock:
- tags_comps[0] += 1
-
- self.update_compatibles(new_idxes[self.index][0])
-
- new_idxes[self.index][3].set() # Tell that UpdateComps processed the tag
-
- self.index += self.inc
-
- with tags_comps_lock:
- tags_comps[1] += 1
- progress('comps: Thread ' + str(tags_comps[1]) + '/' + str(self.inc) + ' finished', tags_comps[0])
-
- def update_compatibles(self, idxes):
- global hash_file_lock, comps_lock, tags_comps
-
- for idx in idxes:
- if idx % 1000 == 0: progress('comps: ' + str(idx), tags_comps[0])
-
- with hash_file_lock:
- hash = db.hash.get(idx)
- filename = db.file.get(idx)
-
- family = lib.getFileFamily(filename)
- if family in [None, 'K', 'M']: continue
-
- lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
- comps = {}
- for l in lines:
- ident, line = l.split(' ')
-
- if ident in comps:
- comps[ident] += ',' + str(line)
- else:
- comps[ident] = str(line)
-
- with comps_lock:
- for ident, lines in comps.items():
- if db.comps.exists(ident):
- obj = db.comps.get(ident)
- else:
- obj = data.RefList()
-
- obj.append(idx, lines, family)
- if verbose:
- print(f"comps: {ident} in #{idx} @ {line}")
- db.comps.put(ident, obj)
-
-
-class UpdateCompsDocs(Thread):
- def __init__(self, start, inc):
- Thread.__init__(self, name="UpdateCompsDocsElixir")
- self.index = start
- self.inc = inc # Equivalent to the number of comps_docs threads
-
- def run(self):
- global new_idxes, tags_done, tags_comps_docs, tags_comps_docs_lock
-
- while not (tags_done and self.index >= len(new_idxes)):
- if self.index >= len(new_idxes):
- # Wait for new tags
- with tag_ready:
- tag_ready.wait()
- continue
-
- new_idxes[self.index][1].wait() # Make sure the tag is ready
- new_idxes[self.index][3].wait() # Make sure UpdateComps processed the tag
- new_idxes[self.index][4].wait() # Make sure UpdateVersions processed the tag
-
- with tags_comps_docs_lock:
- tags_comps_docs[0] += 1
-
- self.update_compatibles_bindings(new_idxes[self.index][0])
-
- self.index += self.inc
-
- with tags_comps_docs_lock:
- tags_comps_docs[1] += 1
- progress('comps_docs: Thread ' + str(tags_comps_docs[1]) + '/' + str(self.inc) + ' finished', tags_comps_docs[0])
-
- def update_compatibles_bindings(self, idxes):
- global hash_file_lock, comps_lock, comps_docs_lock, tags_comps_docs, bindings_idxes
-
- for idx in idxes:
- if idx % 1000 == 0: progress('comps_docs: ' + str(idx), tags_comps_docs[0])
-
- if not idx in bindings_idxes: # Parse only bindings doc files
- continue
-
- with hash_file_lock:
- hash = db.hash.get(idx)
-
- family = 'B'
- lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
- comps_docs = {}
- with comps_lock:
- for l in lines:
- ident, line = l.split(' ')
-
- if db.comps.exists(ident):
- if ident in comps_docs:
- comps_docs[ident] += ',' + str(line)
- else:
- comps_docs[ident] = str(line)
-
- with comps_docs_lock:
- for ident, lines in comps_docs.items():
- if db.comps_docs.exists(ident):
- obj = db.comps_docs.get(ident)
- else:
- obj = data.RefList()
-
- obj.append(idx, lines, family)
- if verbose:
- print(f"comps_docs: {ident} in #{idx} @ {line}")
- db.comps_docs.put(ident, obj)
-
-
-def progress(msg, current):
- print('{} - {} ({:.1%})'.format(project, msg, current/num_tags))
-
-
-# Main
-
-# Check number of threads arg
-if len(argv) >= 2 and argv[1].isdigit() :
- cpu = int(argv[1])
-
- if cpu < 5 :
- cpu = 5
-
-# Distribute threads among functions using the following rules :
-# There are more (or equal) refs threads than others
-# There are more (or equal) defs threads than docs or comps threads
-# Example : if cpu=6 : defs=1, refs=2, docs=1, comps=1, comps_docs=1
-# if cpu=7 : defs=2, refs=2, docs=1, comps=1, comps_docs=1
-# if cpu=8 : defs=2, refs=3, docs=1, comps=1, comps_docs=1
-# if cpu=11: defs=2, refs=3, docs=2, comps=2, comps_docs=2
-quo, rem = divmod(cpu, 5)
-num_th_refs = quo
-num_th_defs = quo
-num_th_docs = quo
-
-# If DT bindings support is enabled, use $quo threads for each of the 2 threads
-# Otherwise add them to the remaining threads
-if dts_comp_support:
- num_th_comps = quo
- num_th_comps_docs = quo
-else :
- num_th_comps = 0
- num_th_comps_docs = 0
- rem += 2*quo
-
-quo, rem = divmod(rem, 2)
-num_th_defs += quo
-num_th_refs += quo + rem
-
-tag_buf = []
-for tag in scriptLines('list-tags'):
- if not db.vers.exists(tag):
- tag_buf.append(tag)
-
-num_tags = len(tag_buf)
-project = lib.currentProject()
-
-print(project + ' - found ' + str(num_tags) + ' new tags')
-
-if not num_tags:
- # Backward-compatibility: generate defs caches if they are empty.
- if db.defs_cache['C'].db.stat()['nkeys'] == 0:
- generate_defs_caches()
- exit(0)
-
-threads_list.append(UpdateIds(tag_buf))
-threads_list.append(UpdateVersions(tag_buf))
-
-# Define defs threads
-for i in range(num_th_defs):
- threads_list.append(UpdateDefs(i, num_th_defs))
-# Define refs threads
-for i in range(num_th_refs):
- threads_list.append(UpdateRefs(i, num_th_refs))
-# Define docs threads
-for i in range(num_th_docs):
- threads_list.append(UpdateDocs(i, num_th_docs))
-# Define comps threads
-for i in range(num_th_comps):
- threads_list.append(UpdateComps(i, num_th_comps))
-# Define comps_docs threads
-for i in range(num_th_comps_docs):
- threads_list.append(UpdateCompsDocs(i, num_th_comps_docs))
-
-
-# Start to process tags
-threads_list[0].start()
-
-# Wait until the first tag is ready
-with tag_ready:
- tag_ready.wait()
-
-# Start remaining threads
-for i in range(1, len(threads_list)):
- threads_list[i].start()
-
-# Make sure all threads finished
-for i in range(len(threads_list)):
- threads_list[i].join()
diff --git a/utils/index b/utils/index
index 6e84a3e7..61250a22 100755
--- a/utils/index
+++ b/utils/index
@@ -59,14 +59,10 @@ project_fetch() {
# $1 is the project path (parent of data/ and repo/).
project_index() {
- if test -z "$ELIXIR_THREADS"; then
- ELIXIR_THREADS="$(nproc)"
- fi
-
elixir_sources="$(dirname "$(dirname "$0")")"
LXR_REPO_DIR=$1/repo LXR_DATA_DIR=$1/data \
- python3 "$elixir_sources/update.py" $ELIXIR_THREADS
+ python3 -m elixir.update
}
# $1 is the Elixir root data path.