Skip to content

Commit

Permalink
Speed up analyse with asyncio
Browse files Browse the repository at this point in the history
  • Loading branch information
fkantelberg committed Mar 19, 2024
2 parents d8643d5 + 4f60a34 commit e3bdb20
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 65 deletions.
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ repos:
args:
- --settings=.

- repo: https://github.com/pre-commit/mirrors-pylint
rev: v2.7.4
- repo: https://github.com/pylint-dev/pylint
rev: v3.1.0
hooks:
- id: pylint
name: pylint
Expand Down
1 change: 1 addition & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ disable=bad-inline-option,
too-many-public-methods,
too-many-return-statements,
use-symbolic-message-instead,
useless-option-value,
useless-suppression,


Expand Down
2 changes: 1 addition & 1 deletion src/odoo_analyse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
"geometric_mean",
]

VERSION = "1.6.1"
VERSION = "2.0.0"
8 changes: 8 additions & 0 deletions src/odoo_analyse/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ def parser_analyse(parser):
action="store_true",
help="Only analyse the absolute minimum",
)
parser.add_argument(
"-j",
"--jobs",
default=os.cpu_count(),
type=int,
help="Number of modules to analyse in parallel",
)


def parser_filters(parser):
Expand Down Expand Up @@ -365,6 +372,7 @@ def main(): # noqa: C901 # pylint: disable=R0915
"skip_language": args.skip_language or args.skip_all,
"skip_python": args.skip_python or args.skip_all,
"skip_readme": args.skip_readme or args.skip_all,
"jobs": args.jobs,
}

for p in args.path:
Expand Down
4 changes: 2 additions & 2 deletions src/odoo_analyse/js_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def from_json(cls, data):
)

@classmethod
def from_file(cls, path, file):
async def from_file(cls, path, file):
if not os.path.isfile(path):
return None

Expand All @@ -135,7 +135,7 @@ def from_file(cls, path, file):

name = url_to_module_path(file)

complexity = eslint_complexity(path)
complexity = await eslint_complexity(path)

# Old odoo.define format
defines = ODOO_DEFINE_RE.findall(content)
Expand Down
133 changes: 94 additions & 39 deletions src/odoo_analyse/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl.html)

import ast
import asyncio
import glob
import logging
import os
Expand Down Expand Up @@ -161,8 +162,8 @@ def update(self, **kwargs):
def __repr__(self):
return f"<Module: {self.name}>"

def analyse_language(self):
self.language = analyse_language(self.path)
async def analyse_language(self):
self.language = await analyse_language(self.path)

def analyse_hash(self, files_list):
self.hashsum = hexhash_files(files_list, self.path)
Expand Down Expand Up @@ -285,7 +286,7 @@ def _parse_csv(self, path):
self.status.add("missing-file")
return

def _parse_js(self, path, pattern):
async def _parse_js(self, path, pattern):
"""Parse JavaScript files.
`path` .. directory of the module
`pattern` .. relative path/glob of the JS files"""
Expand All @@ -294,13 +295,13 @@ def _parse_js(self, path, pattern):
if not file.endswith(".js"):
continue

modules = JSModule.from_file(file, pattern)
modules = await JSModule.from_file(file, pattern)
if not modules:
return

self.js_modules.update(modules)

def _parse_assets(self, parent_path):
async def _parse_assets(self, parent_path):
for files in self.manifest.get("assets", {}).values():
for file in files:
# Might be a tuple with include/remove
Expand All @@ -309,9 +310,9 @@ def _parse_assets(self, parent_path):
if not isinstance(file, str):
file = file[-1]

self._parse_js(parent_path, file)
await self._parse_js(parent_path, file)

def _parse_xml(self, path, parent_path=None):
async def _parse_xml(self, path, parent_path=None):
if not os.path.isfile(path):
self.status.add("missing-file")
return
Expand Down Expand Up @@ -363,7 +364,7 @@ def _parse_xml(self, path, parent_path=None):

for script in obj.xpath("//script/@src"):
# this will return string a path,
self._parse_js(parent_path, script)
await self._parse_js(parent_path, script)

def _parse_text_for_keywords(self, texts):
if not isinstance(texts, list):
Expand Down Expand Up @@ -429,7 +430,7 @@ def from_json(cls, data):
return module

@classmethod
def from_path(cls, path, **config): # noqa: C901
async def from_path(cls, path, **config): # noqa: C901
parent_path = str(Path(path).parent.absolute())
files_list = []
analyse_start = time.time()
Expand Down Expand Up @@ -470,17 +471,17 @@ def from_path(cls, path, **config): # noqa: C901
return None

if not config.get("skip_language"):
module.analyse_language()
await module.analyse_language()

if not config.get("skip_assets"):
module._parse_assets(parent_path)
await module._parse_assets(parent_path)

if not config.get("skip_data"):
for file in module.files:
file_path = os.path.join(path, file)
files_list.append(file_path)
if file.endswith(".xml"):
module._parse_xml(file_path, parent_path)
await module._parse_xml(file_path, parent_path)
elif file.endswith(".csv"):
module._parse_csv(file_path)

Expand All @@ -494,38 +495,92 @@ def from_path(cls, path, **config): # noqa: C901
return module

@classmethod
def find_modules_iter(cls, paths, depth=None, **config):
result = {}
async def _worker_find_modules(
cls, lock, job_queue, result_queue, *, blacklist, max_depth, **config
):
while True:
await lock.acquire()
path, current_depth = await job_queue.get()
lock.release()

try:
path = path.strip()
if max_depth is not None and current_depth > max_depth:
continue

try:
module = await cls.from_path(path, **config)
except Exception as e:
_logger.error(f"Error on {path}")
_logger.exception(e)
continue

if module is not None:
name = module.name
await result_queue.put((name, module))
else:
sub_paths = [
os.path.join(path, p)
for p in os.listdir(path)
if p not in blacklist
]
for p in filter(os.path.isdir, sub_paths):
await job_queue.put((p, current_depth + 1))
finally:
job_queue.task_done()

@classmethod
async def find_modules_iter(cls, paths, *, max_depth=None, **config):
if isinstance(paths, str):
paths = [paths]

paths = [(p, 0) for p in paths]
jobs = config.get("jobs", os.cpu_count())

job_queue = asyncio.Queue()
result_queue = asyncio.Queue()
lock = asyncio.Semaphore(jobs)
blacklist = folder_blacklist()
# Breadth-first search
while paths:
path, d = paths.pop(0)
path = path.strip()
if depth is not None and d > depth:
continue

try:
module = cls.from_path(path, **config)
except Exception as e:
_logger.exception(e)
continue
for p in paths:
await job_queue.put((p, 0))

workers = [
asyncio.create_task(
cls._worker_find_modules(
lock,
job_queue,
result_queue,
blacklist=blacklist,
max_depth=max_depth,
**config,
)
)
for _ in range(jobs)
]

if module is not None:
name = module.name
if name not in result:
yield name, module
else:
sub_paths = [
os.path.join(path, p)
for p in os.listdir(path)
if p not in blacklist
]
paths.extend((p, d + 1) for p in sub_paths if os.path.isdir(p))
# The semaphore should only lock when all workers are waiting
while not lock.locked():
if not result_queue.empty():
yield await result_queue.get()
await asyncio.sleep(0)

# Cancel all workers and wait for it
await job_queue.join()
for worker in workers:
worker.cancel()

await asyncio.gather(*workers, return_exceptions=True)

# Clear the result queue fully
while not result_queue.empty():
yield await result_queue.get()

@classmethod
def find_modules(cls, paths, depth=None, **config):
return dict(cls.find_modules_iter(paths, depth, **config))
async def find_modules(cls, paths, *, max_depth=None, **config):
result = {}
async for key, value in cls.find_modules_iter(
paths, max_depth=max_depth, **config
):
result[key] = value

return result
5 changes: 3 additions & 2 deletions src/odoo_analyse/odoo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# © 2020 initOS GmbH
# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl.html)

import asyncio
import csv
import json
import logging
Expand Down Expand Up @@ -337,11 +338,11 @@ def _analyse_out_json(self, data, file_path): # pylint: disable=R0201
with open(file_path, "w+", encoding="utf-8") as fp:
json.dump(data, fp, indent=2, cls=utils.JSONEncoder)

def load_path(self, paths, depth=None, **config):
def load_path(self, paths, *, max_depth=None, **config):
if isinstance(paths, str):
paths = [paths]

result = Module.find_modules(paths, depth=depth, **config)
result = asyncio.run(Module.find_modules(paths, max_depth=max_depth, **config))

self.full.update(result.copy())
self.modules.update(result.copy())
Expand Down
23 changes: 13 additions & 10 deletions src/odoo_analyse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl.html)

import ast
import asyncio
import hashlib
import json
import logging
Expand All @@ -21,16 +22,18 @@ def default(self, o):
return super().default(o)


def call(cmd, cwd=None):
with subprocess.Popen(
cmd,
async def call(cmd, cwd=None):
if isinstance(cmd, str):
cmd = [cmd]

proc = await asyncio.create_subprocess_exec(
*cmd,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
cwd=cwd,
universal_newlines=True,
env={**os.environ, "GIT_TERMINAL_PROMPT": "0"},
) as proc:
return [pipe.strip() for pipe in proc.communicate()]
)
return [pipe.decode().strip() for pipe in await proc.communicate()]


def stopwords(words=None):
Expand Down Expand Up @@ -112,13 +115,13 @@ def try_automatic_port(filepath):
return True


def analyse_language(path):
async def analyse_language(path):
"""Analyse the languages of a directory"""
cmd = shutil.which("cloc")
if cmd is None:
return {}

output, error = call([cmd, path, "--json", "--strip-str-comments"])
output, error = await call([cmd, path, "--json", "--strip-str-comments"])
if error:
_logger.warning(error)

Expand Down Expand Up @@ -165,13 +168,13 @@ def get_ast_source_segment(source, node):
return "".join(segment)


def eslint_complexity(js_file):
async def eslint_complexity(js_file):
"""Return the JS complexity using eslintcc"""
cmd = shutil.which("eslintcc")
if not cmd:
return None

output, _ = call([cmd, "-a", "-f=json", js_file])
output, _ = await call([cmd, "-a", "-f=json", js_file])
try:
output = json.loads(output)
except JSONDecodeError:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_module.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
# © 2020 initOS GmbH
# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl.html)

import asyncio
import os
from unittest import mock

from odoo_analyse import Model, Module, Record, module


def get_module():
modules = Module.find_modules(os.path.abspath("tests/"))
modules = asyncio.run(Module.find_modules(os.path.abspath("tests/")))
return modules["testing_module"]


Expand Down
4 changes: 2 additions & 2 deletions tests/test_odoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def test_odoo_path(odoo):

def test_odoo_creation():
path = os.path.abspath("tests")
assert Odoo.from_path("%s/testing_module/__manifest__.py" % path) is None
assert Odoo.from_path(f"{path}/testing_module/__manifest__.py") is None

with tempfile.NamedTemporaryFile("w+") as cfg:
cfg.write("[options]\naddons_path=%s\n" % path)
cfg.write(f"[options]\naddons_path={path}\n")
cfg.seek(0)

check_odoo(Odoo.from_config(cfg.name))
Expand Down
Loading

0 comments on commit e3bdb20

Please sign in to comment.