diff --git a/elixir/data.py b/elixir/data.py
index b952943c..74116af1 100755
--- a/elixir/data.py
+++ b/elixir/data.py
@@ -18,13 +18,21 @@
 #  You should have received a copy of the GNU Affero General Public License
 #  along with Elixir.  If not, see <http://www.gnu.org/licenses/>.
 
+from typing import OrderedDict
 import berkeleydb
 import re
+import time
 from . import lib
+from .lib import autoBytes
 import os
 import os.path
 import errno
 
+# Cache size used by the update script for the largest databases. Tuple of (gigabytes, bytes).
+# https://docs.oracle.com/database/bdb181/html/api_reference/C/dbset_cachesize.html
+# https://docs.oracle.com/database/bdb181/html/programmer_reference/general_am_conf.html#am_conf_cachesize
+CACHESIZE = (2,0)
+
 deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?')
 deflist_macro_regex = re.compile('\dM\d+(\w)')
 
@@ -59,43 +67,86 @@ class DefList:
     def __init__(self, data=b'#'):
         self.data, self.families = data.split(b'#')
 
+        self.modified = False
+        self.entries = None
+        self.to_append = []
+
+    def populate_entries(self):
+        entries_modified = False
+        if self.entries is None:
+            self.entries = [
+                (int(d[0]), d[1], int(d[2]), d[3])
+                for d in deflist_regex.findall(self.data)
+            ]
+            entries_modified = True
+
+        if len(self.to_append) != 0:
+            self.entries += self.to_append
+            self.to_append = []
+            entries_modified = True
+
+        if entries_modified:
+            self.entries.sort(key=lambda x:int(x[0]))
+
     def iter(self, dummy=False):
         # Get all element in a list of sublists and sort them
-        entries = deflist_regex.findall(self.data)
-        entries.sort(key=lambda x:int(x[0]))
-        for id, type, line, family in entries:
-            id = int(id)
-            type = defTypeR [type.decode()]
-            line = int(line)
-            family = family.decode()
-            yield id, type, line, family
+        if self.entries is None:
+            self.populate_entries()
+
+        for id, type, line, family in self.entries:
+            yield id, defTypeR[type.decode()], int(line), family.decode()
         if dummy:
             yield maxId, None, None, None
 
-    def append(self, id, type, line, family):
+    def exists(self, idx: int, line_num: int):
+        if self.entries is None:
+            self.populate_entries()
+
+        for id, _, line, _ in self.entries:
+            if id == idx and int(line) == line_num:
+                return True
+
+        return False
+
+    def append(self, id: int, type, line: int, family: str):
         if type not in defTypeD:
             return
-        p = str(id) + defTypeD[type] + str(line) + family
-        if self.data != b'':
-            p = ',' + p
-        self.data += p.encode()
+
+        self.modified = True
+        if self.entries is None:
+            self.to_append.append((id, defTypeD[type].encode(), line, family.encode()))
+        else:
+            self.entries.append((id, defTypeD[type].encode(), line, family.encode()))
+
         self.add_family(family)
 
-    def pack(self):
-        return self.data + b'#' + self.families
+    def pack(self) -> bytes:
+        if self.entries is None:
+            to_append = b",".join([
+                str(arg[0]).encode() + arg[1] + str(arg[2]).encode() + arg[3]
+                for arg in self.to_append
+            ])
+            self.to_append = []
+            self.data += to_append
+            return self.data + b'#' + self.families
+        else:
+            self.data = b",".join([
+                str(arg[0]).encode() + arg[1] + str(arg[2]).encode() + arg[3]
+                for arg in self.entries
+            ])
+            return self.data + b'#' + self.families
 
-    def add_family(self, family):
-        family = family.encode()
+    def add_family(self, family: str):
         if not family in self.families.split(b','):
             if self.families != b'':
-                family = b',' + family
-            self.families += family
+                family = ',' + family
+            self.families += family.encode()
 
     def get_families(self):
-        return self.families.decode().split(',')
+        return [f.decode() for f in self.families.split(b',')]
 
     def get_macros(self):
-        return deflist_macro_regex.findall(self.data.decode()) or ''
+        return (deflist_macro_regex.findall(self.data.decode()) + [entry[1] for entry in self.to_append]) or ''
 
 class PathList:
     '''Stores associations between a blob ID and a file path.
@@ -124,69 +175,205 @@ class RefList:
         and the corresponding family.'''
     def __init__(self, data=b''):
         self.data = data
+        self.entries = None
+        self.to_append = []
+        self.sorted = False
+        self.modified = False
+
+    def decode_entry(self, k):
+        return (int(k[0].decode()), k[1].decode(), k[2].decode())
+
+    def populate_entries(self):
+        self.entries = [self.decode_entry(x.split(b':')) for x in self.data.split(b'\n')[:-1]]
+        self.entries += self.to_append
+        self.to_append = []
+        self.entries.sort(key=lambda x:int(x[0]))
 
     def iter(self, dummy=False):
-        # Split all elements in a list of sublists and sort them
-        entries = [x.split(b':') for x in self.data.split(b'\n')[:-1]]
-        entries.sort(key=lambda x:int(x[0]))
-        for b, c, d in entries:
-            b = int(b.decode())
-            c = c.decode()
-            d = d.decode()
+        if self.entries is None:
+            self.populate_entries()
+
+        for b, c, d in self.entries:
             yield b, c, d
         if dummy:
             yield maxId, None, None
 
     def append(self, id, lines, family):
-        p = str(id) + ':' + lines + ':' + family + '\n'
-        self.data += p.encode()
+        self.modified = True
+        if self.entries is not None:
+            self.entries.append((id, lines, family))
+        else:
+            self.to_append.append((id, lines, family))
 
     def pack(self):
-        return self.data
+        if self.entries is not None:
+            assert len(self.to_append) == 0
+            result = "".join([str(id) + ":" + lines + ":" + family + "\n" for id, lines, family in self.entries])
+            return result.encode()
+        elif len(self.to_append) != 0:
+            result = "".join([str(id) + ":" + lines + ":" + family + "\n" for id, lines, family in self.to_append])
+            self.data += result.encode()
+            self.to_append = []
+            return self.data
 
 class BsdDB:
-    def __init__(self, filename, readonly, contentType, shared=False):
+    def __init__(self, filename, readonly, contentType, shared=False, cachesize=None):
         self.filename = filename
         self.db = berkeleydb.db.DB()
-        flags = berkeleydb.db.DB_THREAD if shared else 0
+        self.flags = berkeleydb.db.DB_THREAD if shared else 0
 
-        if readonly:
-            flags |= berkeleydb.db.DB_RDONLY
-            self.db.open(filename, flags=flags)
+        self.readonly = readonly
+        if self.readonly:
+            self.flags |= berkeleydb.db.DB_RDONLY
         else:
-            flags |= berkeleydb.db.DB_CREATE
-            self.db.open(filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+            self.flags |= berkeleydb.db.DB_CREATE
+
+        if cachesize is not None:
+            self.db.set_cachesize(cachesize[0], cachesize[1])
+
+        self.open()
         self.ctype = contentType
 
+    def open(self):
+        if self.readonly:
+            self.db.open(self.filename, flags=self.flags)
+        else:
+            self.db.open(self.filename, flags=self.flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+
     def exists(self, key):
-        key = lib.autoBytes(key)
+        key = autoBytes(key)
         return self.db.exists(key)
 
     def get(self, key):
-        key = lib.autoBytes(key)
+        key = autoBytes(key)
         p = self.db.get(key)
-        return self.ctype(p) if p is not None else None
+        if p is None:
+            return None
+        p = self.ctype(p)
+        return p
 
     def get_keys(self):
         return self.db.keys()
 
     def put(self, key, val, sync=False):
-        key = lib.autoBytes(key)
-        val = lib.autoBytes(val)
+        key = autoBytes(key)
+        val = autoBytes(val)
+        if type(val) is not bytes:
+            val = val.pack()
+        self.db.put(key, val)
+        if sync:
+            self.db.sync()
+
+    def sync(self):
+        self.db.sync()
+    
+    def close(self):
+        self.db.close()
+
+    def __len__(self):
+        return self.db.stat()["nkeys"]
+
+class CachedBsdDB:
+    def __init__(self, filename, readonly, contentType, cachesize):
+        self.filename = filename
+        self.db = None
+        self.readonly = readonly
+
+        self.cachesize = cachesize
+        self.cache = OrderedDict()
+
+        self.open()
+
+        self.ctype = contentType
+
+    def open(self):
+        if self.db is None:
+            self.db = berkeleydb.db.DB()
+
+        flags = 0
+
+        if self.readonly:
+            flags |= berkeleydb.db.DB_RDONLY
+            self.db.open(self.filename, flags=flags)
+        else:
+            flags |= berkeleydb.db.DB_CREATE
+            self.db.open(self.filename, flags=flags, mode=0o644, dbtype=berkeleydb.db.DB_BTREE)
+
+    def exists(self, key):
+        if key in self.cache:
+            return True
+
+        return self.db.exists(autoBytes(key))
+
+    def get(self, key):
+        if key in self.cache:
+            self.cache.move_to_end(key)
+            return self.cache[key]
+
+        p = self.db.get(autoBytes(key))
+        if p is None:
+            return None
+        p = self.ctype(p)
+
+        self.cache[key] = p
+        self.cache.move_to_end(key)
+        if len(self.cache) > self.cachesize:
+            old_k, old_v = self.cache.popitem(last=False)
+            if old_v.modified:
+                self.put_raw(old_k, old_v)
+
+        return p
+
+    def get_keys(self):
+        self.sync()
+        return self.db.keys()
+
+    def put(self, key, val):
+        if self.readonly:
+            raise Exception("database is readonly")
+
+        self.cache[key] = val
+        self.cache.move_to_end(key)
+        if len(self.cache) > self.cachesize:
+            old_k, old_v = self.cache.popitem(last=False)
+            if old_v.modified:
+                self.put_raw(old_k, old_v)
+
+    def put_raw(self, key, val, sync=False):
+        if self.readonly:
+            raise Exception("database is readonly")
+
+        key = autoBytes(key)
+        val = autoBytes(val)
         if type(val) is not bytes:
             val = val.pack()
         self.db.put(key, val)
         if sync:
             self.db.sync()
 
+    def sync(self):
+        start = time.time()
+        flushed = 0
+        if not self.readonly:
+            for k, v in self.cache.items():
+                if v.modified:
+                    v.modified = False
+                    self.put_raw(k, v)
+                    flushed += 1
+
+        print("synced", flushed, "/", len(self.cache), time.time()-start)
+        self.db.sync()
+
     def close(self):
+        self.sync()
         self.db.close()
+        self.db = None
 
     def __len__(self):
         return self.db.stat()["nkeys"]
 
 class DB:
-    def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
+    def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache=None):
         if os.path.isdir(dir):
             self.dir = dir
         else:
@@ -194,6 +381,11 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
 
         ro = readonly
 
+        if update_cache:
+            db_cls = lambda dir, ro, ctype: CachedBsdDB(dir, ro, ctype, cachesize=update_cache)
+        else:
+            db_cls = lambda dir, ro, ctype: BsdDB(dir, ro, ctype, shared=shared)
+
         self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
             # Key-value store of basic information
         self.blob = BsdDB(dir + '/blobs.db', ro, lambda x: int(x.decode()), shared=shared)
@@ -203,7 +395,7 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
         self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
             # Map serial number to filename
         self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
-        self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared)
+        self.defs = db_cls(dir + '/definitions.db', ro, DefList)
         self.defs_cache = {}
         NOOP = lambda x: x
         self.defs_cache['C'] = BsdDB(dir + '/definitions-cache-C.db', ro, NOOP, shared=shared)
@@ -211,12 +403,12 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
         self.defs_cache['D'] = BsdDB(dir + '/definitions-cache-D.db', ro, NOOP, shared=shared)
         self.defs_cache['M'] = BsdDB(dir + '/definitions-cache-M.db', ro, NOOP, shared=shared)
         assert sorted(self.defs_cache.keys()) == sorted(lib.CACHED_DEFINITIONS_FAMILIES)
-        self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared)
-        self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared)
+        self.refs = db_cls(dir + '/references.db', ro, RefList)
+        self.docs = db_cls(dir + '/doccomments.db', ro, RefList)
         self.dtscomp = dtscomp
         if dtscomp:
-            self.comps = BsdDB(dir + '/compatibledts.db', ro, RefList, shared=shared)
-            self.comps_docs = BsdDB(dir + '/compatibledts_docs.db', ro, RefList, shared=shared)
+            self.comps = db_cls(dir + '/compatibledts.db', ro, RefList)
+            self.comps_docs = db_cls(dir + '/compatibledts_docs.db', ro, RefList)
             # Use a RefList in case there are multiple doc comments for an identifier
 
     def close(self):
diff --git a/elixir/lib.py b/elixir/lib.py
index 7d7d0757..2442e107 100755
--- a/elixir/lib.py
+++ b/elixir/lib.py
@@ -21,6 +21,7 @@
 import sys
 import logging
 import subprocess, os
+from typing import List
 
 logger = logging.getLogger(__name__)
 
@@ -46,7 +47,7 @@ def run_cmd(*args, env=None):
 # Invoke ./script.sh with the given arguments
 # Returns the list of output lines
 
-def scriptLines(*args, env=None):
+def scriptLines(*args, env=None) -> List[bytes]:
     p = script(*args, env=env)
     p = p.split(b'\n')
     del p[-1]
diff --git a/elixir/update.py b/elixir/update.py
new file mode 100644
index 00000000..cd2d2493
--- /dev/null
+++ b/elixir/update.py
@@ -0,0 +1,441 @@
+import os.path
+import logging
+import time
+import signal
+import bisect
+import cProfile
+from multiprocessing import cpu_count, set_start_method
+from multiprocessing.pool import Pool
+from typing import Dict, Iterable, List, Optional, Tuple
+from collections import OrderedDict
+
+from find_compatible_dts import FindCompatibleDTS
+
+from elixir.data import DB, BsdDB, CachedBsdDB, DefList, PathList, RefList
+from elixir.lib import (
+    compatibleFamily,
+    compatibleMacro,
+    getDataDir,
+    getFileFamily,
+    isIdent,
+    script,
+    scriptLines,
+)
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+# File identification - id, hash, filename
+FileId = Tuple[int, bytes, str]
+
+# Definitions parsing output, ident -> list of (file_idx, type, line, family)
+DefsDict = Dict[bytes, List[Tuple[int, str, int, str]]]
+
+# References parsing output, ident -> (file_idx, family) -> list of lines
+RefsDict = Dict[bytes, Dict[Tuple[int, str], List[int]]]
+
+# Generic dictionary of ident -> list of lines
+LinesListDict = Dict[str, List[int]]
+
+# File idx -> (hash, filename, is a new file?)
+IdxCache = Dict[int, Tuple[bytes, str, bool]]
+
+class Cache:
+    def __init__(self, size):
+        self.cache = OrderedDict()
+        self.size = size
+
+    def contains(self, key):
+        return key in self.cache
+
+    def get(self, key):
+        self.cache.move_to_end(key)
+        return self.cache[key]
+
+    def put(self, key, val):
+        self.cache[key] = val
+        self.cache.move_to_end(key)
+        if len(self.cache) > self.size:
+            self.cache.popitem(last=False)
+
+# Check if definition for ident is visible in current version
+def def_in_version(def_ident: DefList, idx_to_hash_and_filename: IdxCache) -> bool:
+    def_ident.populate_entries()
+
+    prev_idx = None
+    for def_idx, _, _, _ in reversed(def_ident.entries):
+        if def_idx == prev_idx:
+            continue
+        if def_idx in idx_to_hash_and_filename:
+            return True
+        prev_idx = def_idx
+    return False
+
+# Add definitions to database
+def add_defs(db: DB, defs: DefsDict):
+    for ident, occ_list in defs.items():
+        obj = db.defs.get(ident)
+        if obj is None:
+            obj = DefList()
+
+        for (idx, type, line, family) in occ_list:
+            obj.append(idx, type, line, family)
+
+        db.defs.put(ident, obj)
+
+# Add references to database
+def add_refs(db: DB, in_ver_cache: Cache, idx_to_hash_and_filename: IdxCache, refs: RefsDict):
+    for ident, idx_to_lines in refs.items():
+        deflist = db.defs.get(ident)
+        if deflist is None:
+            continue
+
+        if not in_ver_cache.contains(ident):
+            in_version = def_in_version(deflist, idx_to_hash_and_filename)
+            if not in_version:
+                in_ver_cache.put(ident, False)
+                continue
+            in_ver_cache.put(ident, True)
+        elif not in_ver_cache.get(ident):
+            continue
+
+        obj = db.refs.get(ident)
+        if obj is None:
+            obj = RefList()
+
+        for (idx, family), lines_str in idx_to_lines.items():
+            obj.append(idx, lines_str, family)
+
+        db.refs.put(ident, obj)
+
+# Add documentation references to database
+def add_docs(db: DB, idx: int, family: str, docs: Dict[str, List[int]]):
+    add_to_lineslist(db.docs, idx, family, docs)
+
+# Add compatible references to database
+def add_comps(db: DB, idx: int, family: str, comps: Dict[str, List[int]]):
+    add_to_lineslist(db.comps, idx, family, comps)
+
+# Add compatible docs to database
+def add_comps_docs(db: DB, idx: int, family: str, comps_docs: Dict[str, List[int]]):
+    comps_result = {}
+    for ident, v in comps_docs.items():
+        if db.comps.exists(ident):
+            comps_result[ident] = v
+
+    add_to_lineslist(db.comps_docs, idx, family, comps_result)
+
+# Add data to a database file that uses lines list schema
+def add_to_lineslist(db_file: BsdDB, idx: int, family: str, to_add: Dict[str, List[int]]):
+    for ident, lines in to_add.items():
+        obj = db_file.get(ident)
+        if obj is None:
+            obj = RefList()
+
+        lines_str = ','.join((str(n) for n in lines))
+        obj.append(idx, lines_str, family)
+        db_file.put(ident, obj)
+
+
+# Adds blob list to database, returns blob id -> (hash, filename) dict
+def collect_blobs(db: DB, tag: bytes) -> IdxCache:
+    idx = db.vars.get('numBlobs')
+    if idx is None:
+        idx = 0
+
+    # Get blob hashes and associated file names (without path)
+    blobs = scriptLines('list-blobs', '-p', tag)
+    versionBuf = []
+    idx_to_hash_and_filename = {}
+
+    # Collect new blobs, assign database ids to the blobs
+    for blob in blobs:
+        hash, path = blob.split(b' ',maxsplit=1)
+        filename = os.path.basename(path.decode())
+        blob_idx = db.blob.get(hash)
+
+        if blob_idx is not None:
+            versionBuf.append((blob_idx, path))
+            if blob_idx not in idx_to_hash_and_filename:
+                idx_to_hash_and_filename[blob_idx] = (hash, filename, False)
+        else:
+            versionBuf.append((idx, path))
+            idx_to_hash_and_filename[idx] = (hash, filename, True)
+            db.blob.put(hash, idx)
+            db.hash.put(idx, hash)
+            db.file.put(idx, filename)
+            idx += 1
+
+    # Update number of blobs in the database
+    db.vars.put('numBlobs', idx)
+
+    # Add mapping blob id -> path to version database
+    versionBuf.sort()
+    obj = PathList()
+    for i, path in versionBuf:
+        obj.append(i, path)
+    db.vers.put(tag, obj, sync=True)
+
+    return idx_to_hash_and_filename
+
+# Generate definitions cache databases
+def generate_defs_caches(db: DB):
+    for key in db.defs.get_keys():
+        value = db.defs.get(key)
+        for family in ['C', 'K', 'D', 'M']:
+            if (compatibleFamily(value.get_families(), family) or
+                        compatibleMacro(value.get_macros(), family)):
+                db.defs_cache[family].put(key, b'')
+
+
+# Collect definitions from ctags for a file
+def get_defs(file_id: FileId) -> Optional[DefsDict]:
+    idx, hash, filename = file_id
+    defs = {}
+    family = getFileFamily(filename)
+    if family in (None, 'M'):
+        return None
+
+    lines = scriptLines('parse-defs', hash, filename, family)
+
+    for l in lines:
+        ident, type, line = l.split(b' ')
+        type = type.decode()
+        line = int(line.decode())
+        if isIdent(ident):
+            if ident not in defs:
+                defs[ident] = []
+            defs[ident].append((idx, type, line, family))
+
+    return defs
+
+def call_get_refs(arg: Tuple[FileId, str]) -> Optional[RefsDict]:
+    return get_refs(arg[0], CachedBsdDB(arg[1], True, DefList, 1000))
+
+# Collect references from the tokenizer for a file
+def get_refs(file_id: FileId, defs: CachedBsdDB) -> Optional[RefsDict]:
+    idx, hash, filename = file_id
+    refs = {}
+    family = getFileFamily(filename)
+    if family is None:
+        return
+
+    # Kconfig values are saved as CONFIG_<value>
+    prefix = b'' if family != 'K' else b'CONFIG_'
+
+    tokens = scriptLines('tokenize-file', '-b', hash, family)
+    even = True
+    line_num = 1
+
+    def deflist_exists(deflist, idx: int, line: int):
+        deflist.populate_entries()
+        start = bisect.bisect_left(deflist.entries, idx, key=lambda x: x[0])
+
+        for def_idx, _, def_line, _ in deflist.entries[start:]:
+            if def_idx == idx:
+                if def_line == line:
+                    return True
+            else:
+                break
+
+        return False
+
+    for tok in tokens:
+        even = not even
+        if even:
+            tok = prefix + tok
+
+            # We only index CONFIG_??? in makefiles
+            if (family != 'M' or tok.startswith(b'CONFIG_')):
+                deflist = defs.get(tok)
+                if not deflist:
+                    continue
+
+                if deflist_exists(deflist, idx, line_num):
+                    continue
+
+                if tok not in refs:
+                    refs[tok] = {}
+
+                if (idx, family) not in refs[tok]:
+                    refs[tok][(idx, family)] = str(line_num)
+                else:
+                    refs[tok][(idx, family)] += "," + str(line_num)
+
+        else:
+            line_num += tok.count(b'\1')
+
+
+    return refs
+
+# Collect compatible script output into lineslinst-schema compatible format
+def collect_get_blob_output(lines: Iterable[str]) -> LinesListDict:
+    results = {}
+    for l in lines:
+        ident, line = l.split(' ')
+        line = int(line)
+
+        if ident not in results:
+            results[ident] = []
+        results[ident].append(line)
+
+    return results
+
+# Collect docs from doc comments script for a single file
+def get_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+    idx, hash, filename = file_id
+    family = getFileFamily(filename)
+    if family in (None, 'M'): return
+
+    start = time.time()
+    lines = (line.decode() for line in scriptLines('parse-docs', hash, filename))
+    parser_time = time.time()-start
+
+    if parser_time > 10:
+        print("docs timeout", parser_time, file_id)
+
+    docs = collect_get_blob_output(lines)
+
+    return (idx, family, docs)
+
+# Collect compatible references for a single file
+def get_comps(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+    idx, hash, filename = file_id
+    family = getFileFamily(filename)
+    if family in (None, 'K', 'M'): return
+
+    compatibles_parser = FindCompatibleDTS()
+
+    start = time.time()
+    lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
+    parser_time = time.time()-start
+
+    if parser_time > 10:
+        print("comps docs timeout", parser_time, file_id)
+
+    comps = collect_get_blob_output(lines)
+
+    return (idx, family, comps)
+
+# Collect compatible documentation references for a single file
+def get_comps_docs(file_id: FileId) -> Optional[Tuple[int, str, LinesListDict]]:
+    idx, hash, _ = file_id
+    family = 'B'
+
+    compatibles_parser = FindCompatibleDTS()
+    lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
+    comps_docs = {}
+    for l in lines:
+        ident, line = l.split(' ')
+
+        if ident not in comps_docs:
+            comps_docs[ident] = []
+        comps_docs[ident].append(int(line))
+
+    return (idx, family, comps_docs)
+
+
+# Update a single version - collects data from all the stages and saves it in the database
+def update_version(db: DB, tag: bytes, pool: Pool, dts_comp_support: bool):
+    idx_to_hash_and_filename = collect_blobs(db, tag)
+
+    # Collect blobs to process and split list of blobs into chunks
+    idxes = [(idx, hash, filename) for (idx, (hash, filename, new)) in idx_to_hash_and_filename.items() if new]
+    chunksize = int(len(idxes) / cpu_count())
+    chunksize = min(max(1, chunksize), 100)
+
+    logger.info("collecting blobs done, new blobs: %d", len(idxes))
+
+    for result in pool.imap_unordered(get_defs, idxes, chunksize):
+        if result is not None:
+            add_defs(db, result)
+
+    logger.info("defs done")
+
+    for result in pool.imap_unordered(get_docs, idxes, chunksize):
+        if result is not None:
+            add_docs(db, *result)
+
+    logger.info("docs done")
+
+    if dts_comp_support:
+        comp_idxes = [idx for idx in idxes if getFileFamily(idx[2]) not in (None, 'K', 'M')]
+        comp_chunksize = int(len(comp_idxes) / cpu_count())
+        comp_chunksize = min(max(1, comp_chunksize), 100)
+        for result in pool.imap_unordered(get_comps, comp_idxes, comp_chunksize):
+            if result is not None:
+                add_comps(db, *result)
+
+        logger.info("dts comps done")
+
+        for result in pool.imap_unordered(get_comps_docs, idxes, chunksize):
+            if result is not None:
+                add_comps_docs(db, *result)
+
+        logger.info("dts comps docs done")
+
+
+    #with cProfile.Profile() as pr:
+    db.defs.close()
+    db.defs.readonly = True
+    db.defs.open()
+
+    in_def_cache = Cache(10000)
+    ref_idxes = [(idx, db.defs.filename) for idx in idxes]
+    ref_chunksize = int(len(ref_idxes) / cpu_count())
+    ref_chunksize = min(max(1, ref_chunksize), 100)
+        #pr.dump_stats("5refs"+str(int(time.time())))
+
+    logger.info("ref blobs: %d", len(ref_idxes))
+
+    for result in pool.imap_unordered(call_get_refs, ref_idxes, ref_chunksize):
+        if result is not None:
+            add_refs(db, in_def_cache, idx_to_hash_and_filename, result)
+
+    db.defs.close()
+    db.defs.readonly = False
+    db.defs.open()
+
+    logger.info("refs done")
+    logger.info("update done")
+
+
+sigint_caught = False
+
+def sigint_handler(signum, _frame):
+    global sigint_caught
+    if not sigint_caught:
+        logger.info("Caught SIGINT... the script will exit after processing this version")
+        signal.signal(signum, signal.SIG_IGN)
+        sigint_caught = True
+
+signal.signal(signal.SIGINT, sigint_handler)
+
+def ignore_sigint():
+    signal.signal(signal.SIGINT, lambda _,__: None)
+
+if __name__ == "__main__":
+
+    dts_comp_support = bool(int(script('dts-comp')))
+    db = DB(getDataDir(), readonly=False, dtscomp=dts_comp_support, shared=False, update_cache=100000)
+
+    set_start_method('spawn')
+    with Pool(initializer=ignore_sigint) as pool:
+        for tag in scriptLines('list-tags'):
+            #if not tag.startswith(b'v6'):
+            #    continue
+
+            if sigint_caught:
+                break
+
+            if not db.vers.exists(tag):
+                logger.info("updating tag %s", tag)
+                update_version(db, tag, pool, dts_comp_support)
+
+    logger.info("generating def caches")
+    generate_defs_caches(db)
+    logger.info("def caches generated")
+    db.close()
+    logger.info("database closed")
+
+
diff --git a/find_compatible_dts.py b/find_compatible_dts.py
index 8aec94d6..a1a356f1 100755
--- a/find_compatible_dts.py
+++ b/find_compatible_dts.py
@@ -31,6 +31,8 @@ def __init__(self):
         self.regex_bindings = re.compile("([\w-]+,?[\w-]+)")
 
     def parse_c(self, content):
+        if "compatible" not in content:
+            return []
         return self.regex_c.findall(content)
 
     def parse_dts(self, content):
diff --git a/script.sh b/script.sh
index 3bbff2a7..656a2633 100755
--- a/script.sh
+++ b/script.sh
@@ -165,7 +165,7 @@ parse_defs_C()
     git cat-file blob "$opt1" > "$full_path"
 
     # Use ctags to parse most of the defs
-    ctags -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" |
+    ctags -u -x --kinds-c=+p+x --extras='-{anonymous}' "$full_path" |
     grep -avE -e "^operator " -e "^CONFIG_" |
     awk '{print $1" "$2" "$3}'
 
@@ -182,7 +182,7 @@ parse_defs_K()
     tmp=`mktemp -d`
     full_path=$tmp/$opt2
     git cat-file blob "$opt1" > "$full_path"
-    ctags -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" |
+    ctags -u -x --language-force=kconfig --kinds-kconfig=c --extras-kconfig=-{configPrefixed} "$full_path" |
     awk '{print "CONFIG_"$1" "$2" "$3}'
     rm "$full_path"
     rmdir $tmp
@@ -193,7 +193,7 @@ parse_defs_D()
     tmp=`mktemp -d`
     full_path=$tmp/$opt2
     git cat-file blob "$opt1" > "$full_path"
-    ctags -x --language-force=dts "$full_path" |
+    ctags -u -x --language-force=dts "$full_path" |
     awk '{print $1" "$2" "$3}'
     rm "$full_path"
     rmdir $tmp
@@ -204,7 +204,13 @@ parse_docs()
     tmpfile=`mktemp`
 
     git cat-file blob "$opt1" > "$tmpfile"
-    "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?"
+
+    # Shortcut: if '/**' isn't present in the file, it cannot contain a doc.
+    # This avoids calling find-file-doc-comments.pl on most files, which is an
+    # expensive operation.
+    if grep -qF '/**' "$tmpfile"; then
+        "$script_dir/find-file-doc-comments.pl" "$tmpfile" || exit "$?"
+    fi
 
     rm -rf "$tmpfile"
 }
diff --git a/update.py b/update.py
deleted file mode 100755
index 9d84ff31..00000000
--- a/update.py
+++ /dev/null
@@ -1,638 +0,0 @@
-#!/usr/bin/env python3
-
-#  This file is part of Elixir, a source code cross-referencer.
-#
-#  Copyright (C) 2017--2020 Mikaël Bouillot <mikael.bouillot@bootlin.com>
-#                           Maxime Chretien <maxime.chretien@bootlin.com>
-#                           and contributors
-#
-#  Elixir is free software: you can redistribute it and/or modify
-#  it under the terms of the GNU Affero General Public License as published by
-#  the Free Software Foundation, either version 3 of the License, or
-#  (at your option) any later version.
-#
-#  Elixir is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU Affero General Public License for more details.
-#
-#  You should have received a copy of the GNU Affero General Public License
-#  along with Elixir.  If not, see <http://www.gnu.org/licenses/>.
-
-# Throughout, an "idx" is the sequential number associated with a blob.
-# This is different from that blob's Git hash.
-
-from sys import argv
-from threading import Thread, Lock, Event, Condition
-
-import elixir.lib as lib
-from elixir.lib import script, scriptLines
-import elixir.data as data
-from elixir.data import PathList
-from find_compatible_dts import FindCompatibleDTS
-
-verbose = False
-
-dts_comp_support = int(script('dts-comp'))
-
-compatibles_parser = FindCompatibleDTS()
-
-db = data.DB(lib.getDataDir(), readonly=False, shared=True, dtscomp=dts_comp_support)
-
-# Number of cpu threads (+2 for version indexing)
-cpu = 10
-threads_list = []
-
-hash_file_lock = Lock() # Lock for db.hash and db.file
-blobs_lock = Lock() # Lock for db.blobs
-defs_lock = Lock() # Lock for db.defs
-refs_lock = Lock() # Lock for db.refs
-docs_lock = Lock() # Lock for db.docs
-comps_lock = Lock() # Lock for db.comps
-comps_docs_lock = Lock() # Lock for db.comps_docs
-tag_ready = Condition() # Waiting for new tags
-
-new_idxes = [] # (new idxes, Event idxes ready, Event defs ready, Event comps ready, Event vers ready)
-bindings_idxes = [] # DT bindings documentation files
-idx_key_mod = 1000000
-defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
-
-tags_done = False # True if all tags have been added to new_idxes
-
-# Progress variables [tags, finished threads]
-tags_defs = [0, 0]
-tags_defs_lock = Lock()
-tags_refs = [0, 0]
-tags_refs_lock = Lock()
-tags_docs = [0, 0]
-tags_docs_lock = Lock()
-tags_comps = [0, 0]
-tags_comps_lock = Lock()
-tags_comps_docs = [0, 0]
-tags_comps_docs_lock = Lock()
-
-class UpdateIds(Thread):
-    def __init__(self, tag_buf):
-        Thread.__init__(self, name="UpdateIdsElixir")
-        self.tag_buf = tag_buf
-
-    def run(self):
-        global new_idxes, tags_done, tag_ready
-        self.index = 0
-
-        for tag in self.tag_buf:
-
-            new_idxes.append((self.update_blob_ids(tag), Event(), Event(), Event(), Event()))
-
-            progress('ids: ' +  tag.decode() + ': ' + str(len(new_idxes[self.index][0])) +
-                        ' new blobs', self.index+1)
-
-            new_idxes[self.index][1].set() # Tell that the tag is ready
-
-            self.index += 1
-
-            # Wake up waiting threads
-            with tag_ready:
-                tag_ready.notify_all()
-
-        tags_done = True
-        progress('ids: Thread finished', self.index)
-
-    def update_blob_ids(self, tag):
-
-        global hash_file_lock, blobs_lock
-
-        if db.vars.exists('numBlobs'):
-            idx = db.vars.get('numBlobs')
-        else:
-            idx = 0
-
-        # Get blob hashes and associated file names (without path)
-        blobs = scriptLines('list-blobs', '-f', tag)
-
-        new_idxes = []
-        for blob in blobs:
-            hash, filename = blob.split(b' ',maxsplit=1)
-            with blobs_lock:
-                blob_exist = db.blob.exists(hash)
-                if not blob_exist:
-                    db.blob.put(hash, idx)
-
-            if not blob_exist:
-                with hash_file_lock:
-                    db.hash.put(idx, hash)
-                    db.file.put(idx, filename)
-
-                new_idxes.append(idx)
-                if verbose:
-                    print(f"New blob #{idx} {hash}:{filename}")
-                idx += 1
-        db.vars.put('numBlobs', idx)
-        return new_idxes
-
-
-class UpdateVersions(Thread):
-    def __init__(self, tag_buf):
-        Thread.__init__(self, name="UpdateVersionsElixir")
-        self.tag_buf = tag_buf
-
-    def run(self):
-        global new_idxes, tag_ready
-
-        index = 0
-
-        while index < len(self.tag_buf):
-            if index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            tag = self.tag_buf[index]
-
-            new_idxes[index][1].wait() # Make sure the tag is ready
-
-            self.update_versions(tag)
-
-            new_idxes[index][4].set() # Tell that UpdateVersions processed the tag
-
-            progress('vers: ' + tag.decode() + ' done', index+1)
-
-            index += 1
-
-        progress('vers: Thread finished', index)
-
-    def update_versions(self, tag):
-        global blobs_lock
-
-        # Get blob hashes and associated file paths
-        blobs = scriptLines('list-blobs', '-p', tag)
-        buf = []
-
-        for blob in blobs:
-            hash, path = blob.split(b' ', maxsplit=1)
-            with blobs_lock:
-                idx = db.blob.get(hash)
-            buf.append((idx, path))
-
-        buf = sorted(buf)
-        obj = PathList()
-        for idx, path in buf:
-            obj.append(idx, path)
-
-            # Store DT bindings documentation files to parse them later
-            if path[:33] == b'Documentation/devicetree/bindings':
-                bindings_idxes.append(idx)
-
-            if verbose:
-                print(f"Tag {tag}: adding #{idx} {path}")
-        db.vers.put(tag, obj, sync=True)
-
-
-def generate_defs_caches():
-    for key in db.defs.get_keys():
-        value = db.defs.get(key)
-        for family in ['C', 'K', 'D', 'M']:
-            if (lib.compatibleFamily(value.get_families(), family) or
-                        lib.compatibleMacro(value.get_macros(), family)):
-                db.defs_cache[family].put(key, b'')
-
-
-class UpdateDefs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateDefsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of defs threads
-
-    def run(self):
-        global new_idxes, tags_done, tag_ready, tags_defs, tags_defs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_defs_lock:
-                tags_defs[0] += 1
-
-            self.update_definitions(new_idxes[self.index][0])
-
-            new_idxes[self.index][2].set() # Tell that UpdateDefs processed the tag
-
-            self.index += self.inc
-
-        with tags_defs_lock:
-            tags_defs[1] += 1
-            progress('defs: Thread ' + str(tags_defs[1]) + '/' + str(self.inc) + ' finished', tags_defs[0])
-
-
-    def update_definitions(self, idxes):
-        global hash_file_lock, defs_lock, tags_defs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('defs: ' + str(idx), tags_defs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'M']: continue
-
-            lines = scriptLines('parse-defs', hash, filename, family)
-
-            with defs_lock:
-                for l in lines:
-                    ident, type, line = l.split(b' ')
-                    type = type.decode()
-                    line = int(line.decode())
-
-                    defs_idxes[idx*idx_key_mod + line] = ident
-
-                    if db.defs.exists(ident):
-                        obj = db.defs.get(ident)
-                    elif lib.isIdent(ident):
-                        obj = data.DefList()
-                    else:
-                        continue
-
-                    obj.append(idx, type, line, family)
-                    if verbose:
-                        print(f"def {type} {ident} in #{idx} @ {line}")
-                    db.defs.put(ident, obj)
-
-        generate_defs_caches()
-
-
-class UpdateRefs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateRefsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of refs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_refs, tags_refs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-            new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag
-
-            with tags_refs_lock:
-                tags_refs[0] += 1
-
-            self.update_references(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_refs_lock:
-            tags_refs[1] += 1
-            progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0])
-
-    def update_references(self, idxes):
-        global hash_file_lock, defs_lock, refs_lock, tags_refs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family == None: continue
-
-            prefix = b''
-            # Kconfig values are saved as CONFIG_<value>
-            if family == 'K':
-                prefix = b'CONFIG_'
-
-            tokens = scriptLines('tokenize-file', '-b', hash, family)
-            even = True
-            line_num = 1
-            idents = {}
-            with defs_lock:
-                for tok in tokens:
-                    even = not even
-                    if even:
-                        tok = prefix + tok
-
-                        if (db.defs.exists(tok) and
-                            not ( (idx*idx_key_mod + line_num) in defs_idxes and
-                                defs_idxes[idx*idx_key_mod + line_num] == tok ) and
-                            (family != 'M' or tok.startswith(b'CONFIG_'))):
-                            # We only index CONFIG_??? in makefiles
-                            if tok in idents:
-                                idents[tok] += ',' + str(line_num)
-                            else:
-                                idents[tok] = str(line_num)
-
-                    else:
-                        line_num += tok.count(b'\1')
-
-            with refs_lock:
-                for ident, lines in idents.items():
-                    if db.refs.exists(ident):
-                        obj = db.refs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"ref: {ident} in #{idx} @ {lines}")
-                    db.refs.put(ident, obj)
-
-
-class UpdateDocs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateDocsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of docs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_docs, tags_docs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_docs_lock:
-                tags_docs[0] += 1
-
-            self.update_doc_comments(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_docs_lock:
-            tags_docs[1] += 1
-            progress('docs: Thread ' + str(tags_docs[1]) + '/' + str(self.inc) + ' finished', tags_docs[0])
-
-    def update_doc_comments(self, idxes):
-        global hash_file_lock, docs_lock, tags_docs
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('docs: ' + str(idx), tags_docs[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'M']: continue
-
-            lines = scriptLines('parse-docs', hash, filename)
-            with docs_lock:
-                for l in lines:
-                    ident, line = l.split(b' ')
-                    line = int(line.decode())
-
-                    if db.docs.exists(ident):
-                        obj = db.docs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, str(line), family)
-                    if verbose:
-                        print(f"doc: {ident} in #{idx} @ {line}")
-                    db.docs.put(ident, obj)
-
-
-class UpdateComps(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateCompsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of comps threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_comps, tags_comps_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-
-            with tags_comps_lock:
-                tags_comps[0] += 1
-
-            self.update_compatibles(new_idxes[self.index][0])
-
-            new_idxes[self.index][3].set() # Tell that UpdateComps processed the tag
-
-            self.index += self.inc
-
-        with tags_comps_lock:
-            tags_comps[1] += 1
-            progress('comps: Thread ' + str(tags_comps[1]) + '/' + str(self.inc) + ' finished', tags_comps[0])
-
-    def update_compatibles(self, idxes):
-        global hash_file_lock, comps_lock, tags_comps
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('comps: ' + str(idx), tags_comps[0])
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-                filename = db.file.get(idx)
-
-            family = lib.getFileFamily(filename)
-            if family in [None, 'K', 'M']: continue
-
-            lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
-            comps = {}
-            for l in lines:
-                ident, line = l.split(' ')
-
-                if ident in comps:
-                    comps[ident] += ',' + str(line)
-                else:
-                    comps[ident] = str(line)
-
-            with comps_lock:
-                for ident, lines in comps.items():
-                    if db.comps.exists(ident):
-                        obj = db.comps.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"comps: {ident} in #{idx} @ {line}")
-                    db.comps.put(ident, obj)
-
-
-class UpdateCompsDocs(Thread):
-    def __init__(self, start, inc):
-        Thread.__init__(self, name="UpdateCompsDocsElixir")
-        self.index = start
-        self.inc = inc # Equivalent to the number of comps_docs threads
-
-    def run(self):
-        global new_idxes, tags_done, tags_comps_docs, tags_comps_docs_lock
-
-        while not (tags_done and self.index >= len(new_idxes)):
-            if self.index >= len(new_idxes):
-                # Wait for new tags
-                with tag_ready:
-                    tag_ready.wait()
-                continue
-
-            new_idxes[self.index][1].wait() # Make sure the tag is ready
-            new_idxes[self.index][3].wait() # Make sure UpdateComps processed the tag
-            new_idxes[self.index][4].wait() # Make sure UpdateVersions processed the tag
-
-            with tags_comps_docs_lock:
-                tags_comps_docs[0] += 1
-
-            self.update_compatibles_bindings(new_idxes[self.index][0])
-
-            self.index += self.inc
-
-        with tags_comps_docs_lock:
-            tags_comps_docs[1] += 1
-            progress('comps_docs: Thread ' + str(tags_comps_docs[1]) + '/' + str(self.inc) + ' finished', tags_comps_docs[0])
-
-    def update_compatibles_bindings(self, idxes):
-        global hash_file_lock, comps_lock, comps_docs_lock, tags_comps_docs, bindings_idxes
-
-        for idx in idxes:
-            if idx % 1000 == 0: progress('comps_docs: ' + str(idx), tags_comps_docs[0])
-
-            if not idx in bindings_idxes: # Parse only bindings doc files
-                continue
-
-            with hash_file_lock:
-                hash = db.hash.get(idx)
-
-            family = 'B'
-            lines = compatibles_parser.run(scriptLines('get-blob', hash), family)
-            comps_docs = {}
-            with comps_lock:
-                for l in lines:
-                    ident, line = l.split(' ')
-
-                    if db.comps.exists(ident):
-                        if ident in comps_docs:
-                            comps_docs[ident] += ',' + str(line)
-                        else:
-                            comps_docs[ident] = str(line)
-
-            with comps_docs_lock:
-                for ident, lines in comps_docs.items():
-                    if db.comps_docs.exists(ident):
-                        obj = db.comps_docs.get(ident)
-                    else:
-                        obj = data.RefList()
-
-                    obj.append(idx, lines, family)
-                    if verbose:
-                        print(f"comps_docs: {ident} in #{idx} @ {line}")
-                    db.comps_docs.put(ident, obj)
-
-
-def progress(msg, current):
-    print('{} - {} ({:.1%})'.format(project, msg, current/num_tags))
-
-
-# Main
-
-# Check number of threads arg
-if len(argv) >= 2 and argv[1].isdigit() :
-    cpu = int(argv[1])
-
-    if cpu < 5 :
-        cpu = 5
-
-# Distribute threads among functions using the following rules :
-# There are more (or equal) refs threads than others
-# There are more (or equal) defs threads than docs or comps threads
-# Example : if cpu=6 : defs=1, refs=2, docs=1, comps=1, comps_docs=1
-#           if cpu=7 : defs=2, refs=2, docs=1, comps=1, comps_docs=1
-#           if cpu=8 : defs=2, refs=3, docs=1, comps=1, comps_docs=1
-#           if cpu=11: defs=2, refs=3, docs=2, comps=2, comps_docs=2
-quo, rem = divmod(cpu, 5)
-num_th_refs = quo
-num_th_defs = quo
-num_th_docs = quo
-
-# If DT bindings support is enabled, use $quo threads for each of the 2 threads
-# Otherwise add them to the remaining threads
-if dts_comp_support:
-    num_th_comps = quo
-    num_th_comps_docs = quo
-else :
-    num_th_comps = 0
-    num_th_comps_docs = 0
-    rem += 2*quo
-
-quo, rem = divmod(rem, 2)
-num_th_defs += quo
-num_th_refs += quo + rem
-
-tag_buf = []
-for tag in scriptLines('list-tags'):
-    if not db.vers.exists(tag):
-        tag_buf.append(tag)
-
-num_tags = len(tag_buf)
-project = lib.currentProject()
-
-print(project + ' - found ' + str(num_tags) + ' new tags')
-
-if not num_tags:
-    # Backward-compatibility: generate defs caches if they are empty.
-    if db.defs_cache['C'].db.stat()['nkeys'] == 0:
-        generate_defs_caches()
-    exit(0)
-
-threads_list.append(UpdateIds(tag_buf))
-threads_list.append(UpdateVersions(tag_buf))
-
-# Define defs threads
-for i in range(num_th_defs):
-    threads_list.append(UpdateDefs(i, num_th_defs))
-# Define refs threads
-for i in range(num_th_refs):
-    threads_list.append(UpdateRefs(i, num_th_refs))
-# Define docs threads
-for i in range(num_th_docs):
-    threads_list.append(UpdateDocs(i, num_th_docs))
-# Define comps threads
-for i in range(num_th_comps):
-    threads_list.append(UpdateComps(i, num_th_comps))
-# Define comps_docs threads
-for i in range(num_th_comps_docs):
-    threads_list.append(UpdateCompsDocs(i, num_th_comps_docs))
-
-
-# Start to process tags
-threads_list[0].start()
-
-# Wait until the first tag is ready
-with tag_ready:
-    tag_ready.wait()
-
-# Start remaining threads
-for i in range(1, len(threads_list)):
-    threads_list[i].start()
-
-# Make sure all threads finished
-for i in range(len(threads_list)):
-    threads_list[i].join()
diff --git a/utils/index b/utils/index
index 6e84a3e7..61250a22 100755
--- a/utils/index
+++ b/utils/index
@@ -59,14 +59,10 @@ project_fetch() {
 
 # $1 is the project path (parent of data/ and repo/).
 project_index() {
-    if test -z "$ELIXIR_THREADS"; then
-        ELIXIR_THREADS="$(nproc)"
-    fi
-
     elixir_sources="$(dirname "$(dirname "$0")")"
 
     LXR_REPO_DIR=$1/repo LXR_DATA_DIR=$1/data \
-        python3 "$elixir_sources/update.py" $ELIXIR_THREADS
+        python3 -m elixir.update
 }
 
 # $1 is the Elixir root data path.