From ffd70bc5af309f2b1fabc7a2a65af5ff299ebeb0 Mon Sep 17 00:00:00 2001 From: Ekaterina Tochilina Date: Tue, 15 Aug 2023 19:43:51 +0300 Subject: [PATCH] Moved here utbot_python_runner --- gradle.properties | 3 + utbot-python-types/.gitignore | 3 + utbot-python-types/build.gradle.kts | 30 + .../org/utbot/python/utils/ProcessUtils.kt | 14 +- .../main/python/utbot_mypy_runner/README.md | 0 .../python/utbot_mypy_runner/pyproject.toml | 16 + .../utbot_mypy_runner/__init__.py | 0 .../utbot_mypy_runner/__main__.py | 54 ++ .../utbot_mypy_runner/expression_traverser.py | 130 +++++ .../utbot_mypy_runner/extract_annotations.py | 100 ++++ .../utbot_mypy_runner/mypy_main.py | 150 +++++ .../utbot_mypy_runner/names.py | 71 +++ .../utbot_mypy_runner/nodes.py | 522 ++++++++++++++++++ .../utbot_mypy_runner/utils.py | 37 ++ .../main/resources/utbot_mypy_runner_version | 1 + utbot-python/.gitignore | 1 + .../utbot/python/utils/RequirementsUtils.kt | 15 +- 17 files changed, 1142 insertions(+), 5 deletions(-) create mode 100644 utbot-python-types/.gitignore create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/README.md create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/pyproject.toml create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__init__.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__main__.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/expression_traverser.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/extract_annotations.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/mypy_main.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/names.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/nodes.py create mode 100644 utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/utils.py create mode 100644 utbot-python-types/src/main/resources/utbot_mypy_runner_version diff --git a/gradle.properties b/gradle.properties index d515c05281..7fce7405ac 100644 --- a/gradle.properties +++ b/gradle.properties @@ -92,6 +92,9 @@ commonsIOVersion=2.11.0 javaxVersion=2.2 jakartaVersion=3.1.0 +# Python support +utbotMypyRunnerVersion=0.2.11 + # use latest Java 8 compaitable Spring and Spring Boot versions springVersion=5.3.28 springBootVersion=2.7.13 diff --git a/utbot-python-types/.gitignore b/utbot-python-types/.gitignore new file mode 100644 index 0000000000..cc4c82b6b8 --- /dev/null +++ b/utbot-python-types/.gitignore @@ -0,0 +1,3 @@ +gradle.properties +local_mypy_path +dist/ \ No newline at end of file diff --git a/utbot-python-types/build.gradle.kts b/utbot-python-types/build.gradle.kts index 1379efeff8..5483ff6bb9 100644 --- a/utbot-python-types/build.gradle.kts +++ b/utbot-python-types/build.gradle.kts @@ -5,4 +5,34 @@ dependencies { implementation("com.squareup.moshi:moshi-kotlin:1.11.0") implementation("com.squareup.moshi:moshi-adapters:1.11.0") implementation(group = "io.github.microutils", name = "kotlin-logging", version = kotlinLoggingVersion) +} + +val utbotMypyRunnerVersion = File(project.projectDir, "src/main/resources/utbot_mypy_runner_version").readText() +val pipToken: String? by project +val pythonInterpreter: String? by project +val utbotMypyRunnerPath = File(project.projectDir, "src/main/python/utbot_mypy_runner") +val localMypyPath = File(utbotMypyRunnerPath, "dist") +val localMypyPathText = File(project.projectDir, "src/main/resources/local_mypy_path") + + +val setMypyRunnerVersion = tasks.register("setVersion") { + group = "python" + workingDir = utbotMypyRunnerPath + commandLine(pythonInterpreter!!, "-m", "poetry", "version", utbotMypyRunnerVersion) +} + +val buildMypyRunner = tasks.register("buildUtbotMypyRunner") { + dependsOn(setMypyRunnerVersion) + group = "python" + workingDir = utbotMypyRunnerPath + commandLine(pythonInterpreter!!, "-m", "poetry", "build") + localMypyPathText.writeText(localMypyPath.canonicalPath) + localMypyPathText.createNewFile() +} + +tasks.register("publishUtbotMypyRunner") { + dependsOn(buildMypyRunner) + group = "python" + workingDir = utbotMypyRunnerPath + commandLine(pythonInterpreter!!, "-m", "poetry", "publish", "-u", "__token__", "-p", pipToken!!) } \ No newline at end of file diff --git a/utbot-python-types/src/main/kotlin/org/utbot/python/utils/ProcessUtils.kt b/utbot-python-types/src/main/kotlin/org/utbot/python/utils/ProcessUtils.kt index 583e0531df..379cfc0f7d 100644 --- a/utbot-python-types/src/main/kotlin/org/utbot/python/utils/ProcessUtils.kt +++ b/utbot-python-types/src/main/kotlin/org/utbot/python/utils/ProcessUtils.kt @@ -11,7 +11,15 @@ data class CmdResult( val terminatedByTimeout: Boolean = false ) -fun startProcess(command: List): Process = ProcessBuilder(command).start() +fun startProcess( + command: List, + environmentVariables: Map = emptyMap() +): Process { + val pb = ProcessBuilder(command) + val env = pb.environment() + env += environmentVariables + return pb.start() +} fun getResult(process: Process, timeout: Long? = null): CmdResult { if (timeout != null) { @@ -36,7 +44,7 @@ fun getResult(process: Process, timeout: Long? = null): CmdResult { return CmdResult(stdout.trimIndent(), stderr, process.exitValue()) } -fun runCommand(command: List, timeout: Long? = null): CmdResult { - val process = startProcess(command) +fun runCommand(command: List, timeout: Long? = null, environmentVariables: Map = emptyMap()): CmdResult { + val process = startProcess(command, environmentVariables) return getResult(process, timeout) } \ No newline at end of file diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/README.md b/utbot-python-types/src/main/python/utbot_mypy_runner/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/pyproject.toml b/utbot-python-types/src/main/python/utbot_mypy_runner/pyproject.toml new file mode 100644 index 0000000000..fc9d122a0e --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "utbot_mypy_runner" +version = "0.2.12.dev1" +description = "" +authors = ["Ekaterina Tochilina "] +readme = "README.md" +packages = [{include = "utbot_mypy_runner"}] + +[tool.poetry.dependencies] +python = "^3.8" +mypy = "1.0.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__init__.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__main__.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__main__.py new file mode 100644 index 0000000000..e17d19432f --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/__main__.py @@ -0,0 +1,54 @@ +import argparse +import os + +import utbot_mypy_runner.mypy_main as mypy_main +import utbot_mypy_runner.extract_annotations as extraction + + +parser = argparse.ArgumentParser() +parser.add_argument('--config', required=True) +parser.add_argument('--sources', required=True, nargs='+') +parser.add_argument('--modules', required=True, nargs='+') +parser.add_argument('--annotations_out') +parser.add_argument('--mypy_stdout') +parser.add_argument('--mypy_stderr') +parser.add_argument('--mypy_exit_status') +parser.add_argument('--module_for_types') +parser.add_argument('--indent', type=int) + +args = parser.parse_args() + +if len(args.sources) != len(args.modules): + print("Sources must correspond to modules") + exit(10) + +mypy_args = ["--config-file", args.config] +for module_name in args.modules: + mypy_args += ["-m", module_name] + +stdout, stderr, exit_status, build_result = mypy_main.run(mypy_args) + +if args.mypy_stdout is not None: + with open(args.mypy_stdout, "w") as file: + file.write(stdout) + print("Wrote mypy stdout to", args.mypy_stdout) + +if args.mypy_stderr is not None: + with open(args.mypy_stderr, "w") as file: + file.write(stderr) + print("Wrote mypy stderr to", args.mypy_stderr) + +if args.mypy_exit_status is not None: + with open(args.mypy_exit_status, "w") as file: + file.write(str(exit_status)) + print("Wrote mypy exit status to", args.mypy_exit_status) + +if args.annotations_out is not None: + if build_result is not None: + with open(args.annotations_out, "w") as file: + sources = [os.path.abspath(x) for x in args.sources] + file.write(extraction.get_result_from_mypy_build(build_result, sources, args.module_for_types, args.indent)) + print("Extracted annotations and wrote to", args.annotations_out) + else: + print("For some reason BuildResult is None") + exit(11) diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/expression_traverser.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/expression_traverser.py new file mode 100644 index 0000000000..d1db55a4e5 --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/expression_traverser.py @@ -0,0 +1,130 @@ +import typing as tp + +from mypy.nodes import * +from mypy.traverser import * +import mypy.types + + +class MyTraverserVisitor(TraverserVisitor): + def __init__(self, types, processor: tp.Callable[[int, int, int, int, mypy.types.Type], None]): + self.types = types + self.processor = processor + + def process_expression(self, o: Expression) -> None: + if o in self.types.keys() and not isinstance(self.types[o], mypy.types.AnyType) \ + and o.end_line is not None and o.end_column is not None and o.line >= 0: + self.processor(o.line, o.column, o.end_line, o.end_column, self.types[o]) + + def visit_name_expr(self, o: NameExpr) -> None: + self.process_expression(o) + super().visit_name_expr(o) + + def visit_member_expr(self, o: MemberExpr) -> None: + self.process_expression(o) + super().visit_member_expr(o) + +""" + def visit_yield_expr(self, o: YieldExpr) -> None: + self.process_expression(o) + super().visit_yield_expr(o) + + def visit_call_expr(self, o: CallExpr) -> None: + self.process_expression(o) + super().visit_call_expr(o) + + def visit_op_expr(self, o: OpExpr) -> None: + self.process_expression(o) + super().visit_op_expr(o) + + def visit_comparison_expr(self, o: ComparisonExpr) -> None: + self.process_expression(o) + super().visit_comparison_expr(o) + + def visit_slice_expr(self, o: SliceExpr) -> None: + self.process_expression(o) + super().visit_slice_expr(o) + + def visit_cast_expr(self, o: CastExpr) -> None: + self.process_expression(o) + super().visit_cast_expr(o) + + def visit_assert_type_expr(self, o: AssertTypeExpr) -> None: + self.process_expression(o) + super().visit_assert_type_expr(o) + + def visit_reveal_expr(self, o: RevealExpr) -> None: + self.process_expression(o) + super().visit_reveal_expr(o) + + def visit_assignment_expr(self, o: AssignmentExpr) -> None: + self.process_expression(o) + super().visit_assignment_expr(o) + + def visit_unary_expr(self, o: UnaryExpr) -> None: + self.process_expression(o) + super().visit_unary_expr(o) + + def visit_list_expr(self, o: ListExpr) -> None: + self.process_expression(o) + super().visit_list_expr(o) + + def visit_tuple_expr(self, o: TupleExpr) -> None: + self.process_expression(o) + super().visit_tuple_expr(o) + + def visit_dict_expr(self, o: DictExpr) -> None: + self.process_expression(o) + super().visit_dict_expr(o) + + def visit_set_expr(self, o: SetExpr) -> None: + self.process_expression(o) + super().visit_set_expr(o) + + def visit_index_expr(self, o: IndexExpr) -> None: + self.process_expression(o) + super().visit_index_expr(o) + + def visit_generator_expr(self, o: GeneratorExpr) -> None: + self.process_expression(o) + super().visit_generator_expr(o) + + def visit_dictionary_comprehension(self, o: DictionaryComprehension) -> None: + self.process_expression(o) + super().visit_dictionary_comprehension(o) + + def visit_list_comprehension(self, o: ListComprehension) -> None: + self.process_expression(o) + super().visit_list_comprehension(o) + + def visit_set_comprehension(self, o: SetComprehension) -> None: + self.process_expression(o) + super().visit_set_comprehension(o) + + def visit_conditional_expr(self, o: ConditionalExpr) -> None: + self.process_expression(o) + super().visit_conditional_expr(o) + + def visit_type_application(self, o: TypeApplication) -> None: + self.process_expression(o) + super().visit_type_application(o) + + def visit_lambda_expr(self, o: LambdaExpr) -> None: + self.process_expression(o) + super().visit_lambda_expr(o) + + def visit_star_expr(self, o: StarExpr) -> None: + self.process_expression(o) + super().visit_star_expr(o) + + def visit_backquote_expr(self, o: BackquoteExpr) -> None: + self.process_expression(o) + super().visit_backquote_expr(o) + + def visit_await_expr(self, o: AwaitExpr) -> None: + self.process_expression(o) + super().visit_await_expr(o) + + def visit_super_expr(self, o: SuperExpr) -> None: + self.process_expression(o) + super().visit_super_expr(o) +""" diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/extract_annotations.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/extract_annotations.py new file mode 100644 index 0000000000..80ca3ef33f --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/extract_annotations.py @@ -0,0 +1,100 @@ +import json +import typing as tp +from collections import defaultdict + +import mypy.nodes +import mypy.types + +import utbot_mypy_runner.mypy_main as mypy_main +import utbot_mypy_runner.expression_traverser as expression_traverser +import utbot_mypy_runner.names +from utbot_mypy_runner.utils import get_borders +from utbot_mypy_runner.nodes import * + + +class ExpressionType: + def __init__(self, start_offset: int, end_offset: int, line: int, type_: Annotation): + self.start_offset = start_offset + self.end_offset = end_offset + self.line = line + self.type_ = type_ + + def encode(self): + return { + "startOffset": self.start_offset, + "endOffset": self.end_offset, + "line": self.line, + "type": self.type_.encode() + } + + +def get_output_json(annotations: tp.Dict[str, tp.Dict[str, Definition]], + expression_types: tp.Dict[str, tp.List[ExpressionType]], + names_dict: tp.Dict[str, tp.List[utbot_mypy_runner.names.Name]], + indent: tp.Optional[int]): + node_storage_key = 'nodeStorage' + types_key = 'types' + definitions_key = 'definitions' + names_key = 'names' + + result: tp.Dict[str, tp.Any] = {node_storage_key: {}, types_key: {}} + for key in annotation_node_dict: + result[node_storage_key][str(key)] = annotation_node_dict[key].encode() + + result[definitions_key] = {} + for module in annotations.keys(): + result[definitions_key][module] = {} + for name in annotations[module].keys(): + result[definitions_key][module][name] = annotations[module][name].encode() + + for module in expression_types.keys(): + result[types_key][module] = [x.encode() for x in expression_types[module]] + + result[names_key] = {} + for module in names_dict.keys(): + result[names_key][module] = [x.encode() for x in names_dict[module]] + + return json.dumps(result, indent=indent) + + +def skip_node(node: mypy.nodes.SymbolTableNode) -> bool: + + if isinstance(node.node, mypy.nodes.TypeInfo): + x = node.node + return x.is_named_tuple or (x.typeddict_type is not None) or x.is_newtype or x.is_intersection + + return False + + +def get_result_from_mypy_build(build_result: mypy_main.build.BuildResult, source_paths: tp.List[str], + module_for_types: tp.Optional[str], indent=None) -> str: + annotation_dict: tp.Dict[str, tp.Dict[str, Definition]] = defaultdict(dict) + names_dict: tp.Dict[str, tp.List[utbot_mypy_runner.names.Name]] = utbot_mypy_runner.names.get_names(build_result) + for module in build_result.files.keys(): + mypy_file: mypy.nodes.MypyFile = build_result.files[module] + + for name in mypy_file.names.keys(): + symbol_table_node = build_result.files[module].names[name] + + if skip_node(symbol_table_node): + continue + + only_types = mypy_file.path not in source_paths + + definition = get_definition_from_symbol_node(symbol_table_node, Meta(module), only_types) + if definition is not None: + annotation_dict[module][name] = definition + + expression_types: tp.Dict[str, tp.List[ExpressionType]] = defaultdict(list) + if module_for_types is not None: + mypy_file = build_result.files[module_for_types] + with open(mypy_file.path, "r") as file: + content = file.readlines() + processor = lambda line, col, end_line, end_col, type_: \ + expression_types[module_for_types].append( # TODO: proper Meta + ExpressionType(*get_borders(line, col, end_line, end_col, content), line, get_annotation(type_, Meta(module_for_types))) + ) + traverser = expression_traverser.MyTraverserVisitor(build_result.types, processor) + traverser.visit_mypy_file(build_result.files[module_for_types]) + + return get_output_json(annotation_dict, expression_types, names_dict, indent) diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/mypy_main.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/mypy_main.py new file mode 100644 index 0000000000..886f3969f4 --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/mypy_main.py @@ -0,0 +1,150 @@ +from mypy.main import * + +import sys + +from io import StringIO +from typing import List, Tuple, TextIO, Callable, Optional, cast + +""" +Copy with some changes of function 'main' from here: +https://github.com/python/mypy/blob/v1.0.0/mypy/main.py +""" +def new_main( + stdout: TextIO, + stderr: TextIO, + args: Optional[List[str]] = None, + clean_exit: bool = False +) -> Optional[build.BuildResult]: + """Main entry point to the type checker. + Args: + args: Custom command-line arguments. If not given, sys.argv[1:] will + be used. + clean_exit: Don't hard kill the process on exit. This allows catching + SystemExit. + """ + util.check_python_version("mypy") + t0 = time.time() + # To log stat() calls: os.stat = stat_proxy + sys.setrecursionlimit(2**14) + if args is None: + args = sys.argv[1:] + + fscache = FileSystemCache() + sources, options = process_options(args, stdout=stdout, stderr=stderr, fscache=fscache) + + # CHANGE: export types of AST nodes + options.preserve_asts = True + options.export_types = True + + if clean_exit: + options.fast_exit = False + + formatter = util.FancyFormatter(stdout, stderr, options.hide_error_codes) + + if options.install_types and (stdout is not sys.stdout or stderr is not sys.stderr): + # Since --install-types performs user input, we want regular stdout and stderr. + fail("error: --install-types not supported in this mode of running mypy", stderr, options) + + if options.non_interactive and not options.install_types: + fail("error: --non-interactive is only supported with --install-types", stderr, options) + + if options.install_types and not options.incremental: + fail( + "error: --install-types not supported with incremental mode disabled", stderr, options + ) + + if options.install_types and options.python_executable is None: + fail( + "error: --install-types not supported without python executable or site packages", + stderr, + options, + ) + + res, messages, blockers = run_build(sources, options, fscache, t0, stdout, stderr) + + if options.non_interactive: + missing_pkgs = read_types_packages_to_install(options.cache_dir, after_run=True) + if missing_pkgs: + # Install missing type packages and rerun build. + install_types(formatter, options, after_run=True, non_interactive=True) + fscache.flush() + print() + res, messages, blockers = run_build(sources, options, fscache, t0, stdout, stderr) + show_messages(messages, stderr, formatter, options) + + if MEM_PROFILE: + from mypy.memprofile import print_memory_profile + + print_memory_profile() + + code = 0 + n_errors, n_notes, n_files = util.count_stats(messages) + if messages and n_notes < len(messages): + code = 2 if blockers else 1 + if options.error_summary: + if n_errors: + summary = formatter.format_error( + n_errors, n_files, len(sources), blockers=blockers, use_color=options.color_output + ) + stdout.write(summary + "\n") + # Only notes should also output success + elif not messages or n_notes == len(messages): + stdout.write(formatter.format_success(len(sources), options.color_output) + "\n") + stdout.flush() + + if options.install_types and not options.non_interactive: + result = install_types(formatter, options, after_run=True, non_interactive=False) + if result: + print() + print("note: Run mypy again for up-to-date results with installed types") + code = 2 + + return res + + +""" +Copy with some changes of mypy api functions from here: +https://github.com/python/mypy/blob/v0.971/mypy/api.py +""" +def _run( + main_wrapper: Callable[[TextIO, TextIO], Optional[build.BuildResult]] +) -> Tuple[str, str, int, Optional[build.BuildResult]]: + + stdout = StringIO() + stderr = StringIO() + + res = None + try: + res = main_wrapper(stdout, stderr) + exit_status = 0 + except SystemExit as system_exit: + exit_status = cast(int, system_exit.code) + + return stdout.getvalue(), stderr.getvalue(), exit_status, res + + +def run(args: List[str]) -> Tuple[str, str, int, Optional[build.BuildResult]]: + args.append("--no-incremental") + return _run(lambda stdout, stderr: new_main(args=args, stdout=stdout, stderr=stderr, clean_exit=True)) + + +if __name__ == "__main__": + import time + start = time.time() + stdout, stderr, exit_status, build_result = run(sys.argv[1:]) + print(f"Seconds passed: {time.time() - start}") + print(stdout, stderr, exit_status, sep='\n') + if build_result is None: + print("BuildResult is None") + else: + print(build_result.files['utbot_mypy_runner.nodes'].names['CompositeAnnotationNode'].node.names["module_key"].node.is_initialized_in_class) + #print(build_result.files['builtins'].names['str'].node.names["count"].node.arguments[2].initializer) + #print(build_result.files['dijkstra'].names['Dijkstra'].node.names['Node'].node.module_name) + #print(build_result.files['numpy'].names['ndarray'].module_public) + #print(build_result.files['_pytest.runner'].names[''].node.is_intersection) + #print(build_result.files['subtypes'].names['P'].node.names['f'].fullname) + #print([build_result.files['subtypes'].names[x].fullname for x in build_result.files['subtypes'].names]) + #print(build_result.files['collections'].names['deque'].node.mro) + #print(build_result.files['collections'].names['Counter'].node._promote) + #for x in build_result.files['builtins'].names['set'].node.defn.type_vars: + # print(type(x)) diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/names.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/names.py new file mode 100644 index 0000000000..e2e4ee0468 --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/names.py @@ -0,0 +1,71 @@ +import typing as tp +import mypy.nodes +import utbot_mypy_runner.mypy_main as mypy_main + + +class Name: + def __init__(self, name, type_='Other'): + self.name = name + self.type_ = type_ + + def encode(self): + return {'name': self.name, 'kind': self.type_} + + +class ModuleName(Name): + def __init__(self, name, fullname): + super().__init__(name, 'Module') + self.fullname = fullname + + def encode(self): + superclass_dict = super().encode() + subclass_dict = {'fullname': self.fullname} + return dict(superclass_dict, **subclass_dict) + + +class LocalTypeName(Name): + def __init__(self, name): + super().__init__(name, 'LocalType') + + +class ImportedTypeName(Name): + def __init__(self, name, fullname): + super().__init__(name, 'ImportedType') + self.fullname = fullname + + def encode(self): + superclass_dict = super().encode() + subclass_dict = {'fullname': self.fullname} + return dict(superclass_dict, **subclass_dict) + + +def get_names_from_module(module_name: str, table: mypy.nodes.SymbolTable) -> tp.List[Name]: + result: tp.List[Name] = [] + for name in table.keys(): + # TODO: remove synthetic names + + node = table[name].node + + if isinstance(node, mypy.nodes.TypeInfo): + if node.is_intersection: + continue + + if node._fullname.startswith(module_name): + result.append(LocalTypeName(name)) + else: + result.append(ImportedTypeName(name, node._fullname)) + + elif isinstance(node, mypy.nodes.MypyFile): + result.append(ModuleName(name, node._fullname)) + + else: + result.append(Name(name)) + + return result + + +def get_names(build_result: mypy_main.build.BuildResult) -> tp.Dict[str, tp.List[Name]]: + names_dict: tp.Dict[str, tp.List[Name]] = {} + for module in build_result.files.keys(): + names_dict[module] = get_names_from_module(module, build_result.files[module].names) + return names_dict \ No newline at end of file diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/nodes.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/nodes.py new file mode 100644 index 0000000000..caf238765b --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/nodes.py @@ -0,0 +1,522 @@ +import typing as tp +from collections import defaultdict +import copy +import sys + +import mypy.nodes +import mypy.types + + +annotation_node_dict: tp.Dict[str, "AnnotationNode"] = {} +type_vars_of_node: tp.Dict[str, tp.List[str]] = defaultdict(list) +any_type_instance = mypy.types.AnyType(mypy.types.TypeOfAny.unannotated) + + +if sys.version_info >= (3, 10): + EncodedInfo: tp.TypeAlias = tp.Union[str, bool, tp.Dict[str, 'EncodedInfo'], tp.List['EncodedInfo']] +else: + EncodedInfo = tp.Any + + +class Annotation: + node_id_key = "nodeId" + args_key = "args" # optional + + def __init__(self, node_id, args: tp.Optional[tp.List['Annotation']] = None): + self.node_id = node_id + self.args = args + + def encode(self) -> tp.Dict[str, EncodedInfo]: + result: tp.Dict[str, EncodedInfo] = {self.node_id_key: str(self.node_id)} + if self.args is not None: + result[self.args_key] = [x.encode() for x in self.args] + return result + + +def encode_extension(super_encode: tp.Callable[[tp.Any], tp.Dict[str, EncodedInfo]]): + + def decorator(func: tp.Callable[[tp.Any], tp.Dict[str, EncodedInfo]]): + def wrapper(self) -> tp.Dict[str, EncodedInfo]: + superclass_dict = super_encode(self) + subclass_dict = func(self) + return dict(superclass_dict, **subclass_dict) + + return wrapper + + return decorator + + +class AnnotationNode: + type_key = 'type' + + def __init__(self, annotation_type, id_, meta: 'Meta'): + self.type = annotation_type + self.id_ = id_ + annotation_node_dict[id_] = self + self.meta = copy.deepcopy(meta) + + def encode(self) -> tp.Dict[str, EncodedInfo]: + return {self.type_key: self.type} + + def __eq__(self, other): + return self.id_ == other.id_ + + def __hash__(self): + return hash(self.id_) + + +class Definition: + kind_key = 'kind' + + def __init__(self, kind: str, meta: 'Meta'): + self.kind = kind + self.meta = copy.deepcopy(meta) + + def encode(self) -> tp.Dict[str, EncodedInfo]: + return {self.kind_key: self.kind} + + +class Variable(Definition): + kind = 'Variable' + + name_key = 'name' + is_property_key = 'isProperty' + is_self_key = 'isSelf' + type_key = 'type' + is_initialized_in_class_key = 'isInitializedInClass' + + def __init__(self, var: mypy.nodes.Var, meta: 'Meta'): + super().__init__(self.kind, meta) + self.name: str = var.name + self.is_property: bool = var.is_property + self.is_self: bool = var.is_self + self.is_initialized_in_class: bool = var.is_initialized_in_class + self.type: Annotation + if var.type is None or self.meta.is_arg: + self.type = get_annotation(any_type_instance, self.meta) + else: + self.type = get_annotation(var.type, self.meta) + + @encode_extension(Definition.encode) + def encode(self) -> tp.Dict[str, EncodedInfo]: + return { + self.name_key: self.name, + self.is_property_key: self.is_property, + self.is_self_key: self.is_self, + self.type_key: self.type.encode(), + self.is_initialized_in_class_key: self.is_initialized_in_class + } + + +class ClassDef(Definition): + kind = 'ClassDef' + + type_key = 'type' + + def __init__(self, type_info: mypy.nodes.TypeInfo, meta: 'Meta'): + super().__init__(self.kind, meta) + self.type: Annotation = get_annotation(mypy.types.Instance(type_info, []), self.meta) + + @encode_extension(Definition.encode) + def encode(self): + return {self.type_key: self.type.encode()} + + +class FuncDef(Definition): + kind = 'FuncDef' + + type_key = 'type' + args_key = 'args' + name_key = 'name' + + def __init__(self, func_def: mypy.nodes.FuncDef, meta: 'Meta'): + super().__init__(self.kind, meta) + self.type: Annotation + + if func_def.type is None: + node = FunctionNode(str(id(func_def)), self.meta, func_def) + self.type = Annotation(node.id_) + else: + self.type = get_annotation(func_def.type, self.meta) + + self.args: tp.List[Definition] = [] + self.name: str = func_def.name + self.meta.is_arg = True + for x in func_def.arguments: + defn = get_definition_from_node(x.variable, self.meta) + assert defn is not None + self.args.append(defn) + self.meta.is_arg = False + + @encode_extension(Definition.encode) + def encode(self): + return { + self.args_key: [x.encode() for x in self.args], + self.type_key: self.type.encode(), + self.name_key: self.name + } + + +class OverloadedFuncDef(Definition): + kind = 'OverloadedFuncDef' + + type_key = 'type' + items_key = 'items' + name_key = 'name' + + def __init__(self, func_def: mypy.nodes.OverloadedFuncDef, meta: 'Meta'): + super().__init__(self.kind, meta) + self.type: Annotation + if func_def.type is None: + self.type = get_annotation(any_type_instance, self.meta) + else: + self.type = get_annotation(func_def.type, self.meta) + + self.items: tp.List[Definition] = [] + for x in func_def.items: + cur = get_definition_from_node(x, self.meta) + assert cur is not None + self.items.append(cur) + + self.name: str = func_def.name + + @encode_extension(Definition.encode) + def encode(self): + return { + self.type_key: self.type.encode(), + self.items_key: [x.encode() for x in self.items], + self.name_key: self.name + } + + +class TypeVarNode(AnnotationNode): + annotation_type = 'TypeVar' + + var_name_key = 'varName' + values_key = 'values' + upper_bound_key = 'upperBound' + def_key = 'def' + variance_key = 'variance' + + # variance values + covariant = "COVARIANT" + contravariant = "CONTRAVARIANT" + invariant = "INVARIANT" + + def __init__(self, type_var: mypy.types.TypeVarType, id_: str, meta: 'Meta'): + super().__init__(self.annotation_type, id_, meta) + self.name: str = type_var.name + self.values: tp.List[Annotation] = [ + get_annotation(x, self.meta) + for x in type_var.values + ] + self.def_id: str = self.meta.fullname_to_node_id[type_var.id.namespace] + type_vars_of_node[self.def_id].append(id_) + self.upper_bound: Annotation = get_annotation(type_var.upper_bound, self.meta) + self.variance: str + if type_var.variance == mypy.nodes.COVARIANT: + self.variance = self.covariant + elif type_var.variance == mypy.nodes.CONTRAVARIANT: + self.variance = self.contravariant + else: + self.variance = self.invariant + + @encode_extension(AnnotationNode.encode) + def encode(self): + return { + self.var_name_key: self.name, + self.values_key: [x.encode() for x in self.values], + self.upper_bound_key: self.upper_bound.encode(), + self.def_key: self.def_id, + self.variance_key: self.variance + } + + +class FunctionNode(AnnotationNode): + annotation_type = 'Function' + + type_vars_key = 'typeVars' + arg_types_key = 'argTypes' + return_type_key = 'returnType' + arg_kinds_key = 'argKinds' + arg_names_key = 'argNames' + + # argKinds values + arg_pos = "ARG_POS" + arg_opt = "ARG_OPT" + arg_star = "ARG_STAR" + arg_star_2 = "ARG_STAR_2" + arg_named = "ARG_NAMED" + arg_named_opt = "ARG_NAMED_OPT" # TODO: is it needed? + + def __init__(self, id_: str, meta: 'Meta', type: tp.Union[mypy.types.CallableType, mypy.nodes.FuncItem]): + super().__init__(self.annotation_type, id_, meta) + self.type_vars: tp.List[str] + self.arg_types: tp.List[Annotation] + self.return_type: Annotation + self.arg_kinds: tp.List[str] + self.arg_names: tp.List[tp.Optional[str]] + + self.meta.fullname_to_node_id[''] = id_ + + if isinstance(type, mypy.types.CallableType): + self.arg_types = [get_annotation(x, meta=self.meta) for x in type.arg_types] + self.return_type = get_annotation(type.ret_type, self.meta) + self.arg_kinds = [self._get_arg_kind(x) for x in type.arg_kinds] + self.arg_names = type.arg_names + self.type_vars = type_vars_of_node[id_] + elif isinstance(type, mypy.nodes.FuncItem): + self.type_vars = [] + first_arg = [] + if len(type.arguments) and type.arguments[0].variable.is_self: + first_arg = [Annotation(self.meta.containing_class)] + elif len(type.arguments): + first_arg = [get_annotation(any_type_instance, meta=self.meta)] + + self.arg_types = first_arg + [get_annotation(any_type_instance, meta=self.meta) for _ in type.arguments[1:]] + self.return_type = get_annotation(any_type_instance, meta=self.meta) + self.arg_kinds = [self._get_arg_kind(x) for x in type.arg_kinds] + self.arg_names = type.arg_names + else: + assert False, "Not reachable" + + def _get_arg_kind(self, kind): + if kind == mypy.nodes.ARG_POS: + return self.arg_pos + elif kind == mypy.nodes.ARG_OPT: + return self.arg_opt + elif kind == mypy.nodes.ARG_STAR: + return self.arg_star + elif kind == mypy.nodes.ARG_STAR2: + return self.arg_star_2 + elif kind == mypy.nodes.ARG_NAMED_OPT: + return self.arg_named_opt + elif kind == mypy.nodes.ARG_NAMED: + return self.arg_named + else: + assert False, "Not reachable" + + @encode_extension(AnnotationNode.encode) + def encode(self): + return { + self.type_vars_key: self.type_vars, + self.arg_types_key: [x.encode() for x in self.arg_types], + self.return_type_key: self.return_type.encode(), + self.arg_kinds_key: self.arg_kinds, + self.arg_names_key: self.arg_names + } + + +class CompositeAnnotationNode(AnnotationNode): + module_key = 'module' + simple_name_key = 'simpleName' + members_key = 'members' + type_vars_key = 'typeVars' + bases_key = 'bases' + + def __init__(self, annotation_type: str, symbol_node: mypy.nodes.TypeInfo, id_, meta: 'Meta'): + super().__init__(annotation_type, id_, meta) + self.meta.fullname_to_node_id[symbol_node._fullname] = id_ + self.module: str = symbol_node.module_name + self.simple_name: str = symbol_node._fullname[len(self.module)+1:] + + self.meta.containing_class = id_ + self.members: tp.List[Definition] = [] + for name in symbol_node.names.keys(): + inner_node = symbol_node.names[name].node + if inner_node is None: + continue + definition = get_definition_from_node(inner_node, self.meta) + if definition is not None: + self.members.append(definition) + + self.meta.containing_class = None + + self.raw_type_vars: tp.Sequence[mypy.types.Type] = symbol_node.defn.type_vars + self.type_vars: tp.List[Annotation] = [ + get_annotation(x, self.meta) for x in self.raw_type_vars + ] + self.bases: tp.List[Annotation] = [get_annotation(x, self.meta) for x in symbol_node.bases] + + @encode_extension(AnnotationNode.encode) + def encode(self): + return { + self.module_key: self.module, + self.simple_name_key: self.simple_name, + self.members_key: [x.encode() for x in self.members], + self.type_vars_key: [x.encode() for x in self.type_vars], + self.bases_key: [x.encode() for x in self.bases] + } + + +class ConcreteAnnotationNode(CompositeAnnotationNode): + annotation_type = 'Concrete' + + is_abstract_key = 'isAbstract' + + def __init__(self, symbol_node: mypy.nodes.TypeInfo, id_, meta: 'Meta'): + assert not symbol_node.is_protocol + super().__init__(self.annotation_type, symbol_node, id_, meta) + self.is_abstract: bool = symbol_node.is_abstract + + @encode_extension(CompositeAnnotationNode.encode) + def encode(self): + return {self.is_abstract_key: self.is_abstract} + + +class ProtocolAnnotationNode(CompositeAnnotationNode): + annotation_type = 'Protocol' + + member_names_key = 'protocolMembers' + + def __init__(self, symbol_node: mypy.nodes.TypeInfo, id_, meta: 'Meta'): + assert symbol_node.is_protocol + super().__init__(self.annotation_type, symbol_node, id_, meta) + self.member_names: tp.List[str] = symbol_node.protocol_members + + @encode_extension(CompositeAnnotationNode.encode) + def encode(self): + return {self.member_names_key: self.member_names} + + +class AnnotationNodeWithItems(AnnotationNode): + items_key = 'items' + + def __init__(self, annotation_type: str, mypy_type, id_, namespace: 'Meta'): + super().__init__(annotation_type, id_, namespace) + self.items: tp.List[Annotation] = [ + get_annotation(x, self.meta) for x in mypy_type.items + ] + + @encode_extension(AnnotationNode.encode) + def encode(self): + return {self.items_key: [x.encode() for x in self.items]} + + +class TypeAliasNode(AnnotationNode): + annotation_type = 'TypeAlias' + + target_key = 'target' + + def __init__(self, alias: mypy.nodes.TypeAlias, id_: str, meta: 'Meta'): + super().__init__(self.annotation_type, id_, meta) + self.target: Annotation = get_annotation(alias.target, meta) + + @encode_extension(AnnotationNode.encode) + def encode(self): + return {self.target_key: self.target.encode()} + + +class Meta: + def __init__(self, module_name: str, is_arg: bool = False): + self.fullname_to_node_id: tp.Dict[str, str] = {} + self.module_name = module_name + self.is_arg = is_arg + self.containing_class = None + + +def get_annotation_node(mypy_type: mypy.types.Type, meta: Meta) -> AnnotationNode: + + if isinstance(mypy_type, mypy.types.Instance): + id_ = str(id(mypy_type.type)) + elif isinstance(mypy_type, mypy.types.TypeVarType): + if mypy_type.id.namespace not in meta.fullname_to_node_id.keys(): + id_ = '0' + mypy_type = mypy.types.Type() + else: + node = meta.fullname_to_node_id[mypy_type.id.namespace] + id_ = '.' + str(mypy_type.id.raw_id) + '.' + node + elif isinstance(mypy_type, mypy.types.AnyType): + id_ = 'A' + elif isinstance(mypy_type, mypy.types.NoneType): + id_ = 'N' + else: + id_ = str(id(mypy_type)) + + if id_ in annotation_node_dict.keys(): + return annotation_node_dict[id_] + + result: AnnotationNode + + if isinstance(mypy_type, mypy.types.Instance): + if mypy_type.type.is_protocol: + result = ProtocolAnnotationNode(mypy_type.type, id_, meta) + else: + result = ConcreteAnnotationNode(mypy_type.type, id_, meta) + elif isinstance(mypy_type, mypy.types.CallableType): + result = FunctionNode(id_, meta, mypy_type) + + elif isinstance(mypy_type, mypy.types.Overloaded): # several signatures for one function + result = AnnotationNodeWithItems("Overloaded", mypy_type, id_, meta) + + elif isinstance(mypy_type, mypy.types.TypeVarType): + result = TypeVarNode(mypy_type, id_, meta) + + elif isinstance(mypy_type, mypy.types.AnyType): + result = AnnotationNode("Any", id_, meta) + + elif isinstance(mypy_type, mypy.types.TupleType): + result = AnnotationNodeWithItems("Tuple", mypy_type, id_, meta) + + elif isinstance(mypy_type, mypy.types.UnionType): + result = AnnotationNodeWithItems("Union", mypy_type, id_, meta) + + elif isinstance(mypy_type, mypy.types.NoneType): + result = AnnotationNode("NoneType", id_, meta) + + elif isinstance(mypy_type, mypy.types.TypeAliasType) and \ + mypy_type.alias is not None and len(mypy_type.args) == 0: + result = TypeAliasNode(mypy_type.alias, id_, meta) + + else: + id_ = '0' + result = AnnotationNode("Unknown", id_, meta) + + annotation_node_dict[id_] = result + return result + + +def get_annotation(mypy_type: mypy.types.Type, meta: Meta) -> Annotation: + cur_node = get_annotation_node(mypy_type, meta) + + if isinstance(mypy_type, mypy.types.Instance): + children = [] + for arg in mypy_type.args: + children.append(get_annotation(arg, meta)) + + if len(children) == 0: + return Annotation(cur_node.id_) + else: + return Annotation(cur_node.id_, children) + + # TODO: consider LiteralType + + else: + return Annotation(cur_node.id_) + + +def get_definition_from_node(node: mypy.nodes.Node, meta: Meta, only_types: bool = False) -> tp.Optional[Definition]: + if isinstance(node, mypy.nodes.TypeInfo): + return ClassDef(node, meta) + elif not only_types and isinstance(node, mypy.nodes.FuncDef): + return FuncDef(node, meta) + elif not only_types and isinstance(node, mypy.nodes.OverloadedFuncDef): + return OverloadedFuncDef(node, meta) + elif not only_types and isinstance(node, mypy.nodes.Var): + return Variable(node, meta) + elif not only_types and isinstance(node, mypy.nodes.Decorator): + return Variable(node.var, meta) + else: + return None + + +def get_definition_from_symbol_node( + table_node: mypy.nodes.SymbolTableNode, + meta: Meta, + only_types: bool = False +)-> tp.Optional[Definition]: + if table_node.node is None or not (table_node.node.fullname.startswith(meta.module_name)) \ + or not isinstance(table_node.node, mypy.nodes.Node): # this check is only for mypy + return None + + return get_definition_from_node(table_node.node, meta, only_types) \ No newline at end of file diff --git a/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/utils.py b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/utils.py new file mode 100644 index 0000000000..6d3fb12d27 --- /dev/null +++ b/utbot-python-types/src/main/python/utbot_mypy_runner/utbot_mypy_runner/utils.py @@ -0,0 +1,37 @@ +import typing as tp + + +def bad_symbol(c: str) -> bool: + return c.isspace() or c == '[' or c == '(' + + +def get_borders(line: int, column: int, end_line: int, end_column: int, + file_content: tp.List[str]) -> tp.Tuple[int, int]: + while bad_symbol(file_content[line - 1][column - 1]): + line, column = inc_position(line, column, file_content) + + while bad_symbol(file_content[end_line - 1][end_column - 1]): + end_line, end_column = dec_position(end_line, end_column, file_content) + + return get_offset(line, column, file_content), get_offset(end_line, end_column, file_content) + + +def get_offset(line: int, column: int, file_content: tp.List[str]) -> int: + return sum([len(x) for x in file_content[:line-1]]) + column - 1 + + +def inc_position(line: int, column: int, file_content: tp.List[str]) -> tp.Tuple[int, int]: + if column == len(file_content[line - 1]): + line += 1 + column = 1 + else: + column += 1 + return line, column + + +def dec_position(line: int, column: int, file_content: tp.List[str]) -> tp.Tuple[int, int]: + column -= 1 + if column == 0: + line -= 1 + column = len(file_content[line - 1]) + return line, column diff --git a/utbot-python-types/src/main/resources/utbot_mypy_runner_version b/utbot-python-types/src/main/resources/utbot_mypy_runner_version new file mode 100644 index 0000000000..25cd22b69c --- /dev/null +++ b/utbot-python-types/src/main/resources/utbot_mypy_runner_version @@ -0,0 +1 @@ +0.2.12 \ No newline at end of file diff --git a/utbot-python/.gitignore b/utbot-python/.gitignore index 1377554ebe..09283c3ceb 100644 --- a/utbot-python/.gitignore +++ b/utbot-python/.gitignore @@ -1 +1,2 @@ *.swp +use_local_python_packages \ No newline at end of file diff --git a/utbot-python/src/main/kotlin/org/utbot/python/utils/RequirementsUtils.kt b/utbot-python/src/main/kotlin/org/utbot/python/utils/RequirementsUtils.kt index e9f4720172..dafea3c166 100644 --- a/utbot-python/src/main/kotlin/org/utbot/python/utils/RequirementsUtils.kt +++ b/utbot-python/src/main/kotlin/org/utbot/python/utils/RequirementsUtils.kt @@ -1,10 +1,20 @@ package org.utbot.python.utils +import org.utbot.python.newtyping.mypy.MypyInfoBuild + object RequirementsUtils { + private val utbotMypyRunnerVersion = + MypyInfoBuild::class.java.getResource("/utbot_mypy_runner_version")!!.readText() + private val useLocalPythonPackages = // "true" must be set only for debugging + this::class.java.getResource("/use_local_python_packages")?.readText()?.toBoolean() ?: false + private val localMypyRunnerPath = + MypyInfoBuild::class.java.getResource("/local_mypy_path")?.readText() + private val findLinks: List = // for pip + if (useLocalPythonPackages) listOf(localMypyRunnerPath!!) else emptyList() val requirements: List = listOf( "mypy==1.0.0", "utbot-executor==1.4.36", - "utbot-mypy-runner==0.2.11", + "utbot-mypy-runner==$utbotMypyRunnerVersion", ) private val requirementsScriptContent: String = @@ -40,7 +50,8 @@ object RequirementsUtils { "-m", "pip", "install" - ) + moduleNames + ) + moduleNames, + environmentVariables = mapOf("PIP_FIND_LINKS" to findLinks.joinToString(" ")) ) } }