From db5b9d7564ccb48ffdc7fd42b290f5491064f644 Mon Sep 17 00:00:00 2001 From: Abhi <85984486+AbhiTheModder@users.noreply.github.com> Date: Thu, 2 Jan 2025 09:37:59 +0530 Subject: [PATCH] Major Change (#4) * Bump Min. python version to `3.10` * Add New Instructions * Implement tests * Refactor The code now follows a more modular structure, with improved function definitions and better separation of concerns. Fixed several linter (flake8, pylint) warnings Shift to `argparse` for better command-line arguments parsing Improved function for fetching the path to the `grammar.yaml` file. --- .github/workflows/python-package.yml | 40 ++++++++ pyproject.toml | 16 +-- smalig/__init__.py | 6 +- smalig/__main__.py | 59 +---------- smalig/cli/app.py | 147 +++++++++++++-------------- smalig/grammar.yaml | 48 +++++++++ smalig/utils/__init__.py | 56 ++++++++-- tests/__init__.py | 0 tests/test_smalig.py | 32 ++++++ 9 files changed, 245 insertions(+), 159 deletions(-) create mode 100644 .github/workflows/python-package.yml create mode 100644 tests/__init__.py create mode 100644 tests/test_smalig.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..7b0bc2e --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,40 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=180 --statistics + - name: Test with pytest + run: | + pytest \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3c0e293..6a12e63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,14 +4,12 @@ build-backend = "setuptools.build_meta" [project] name = "smalig" -version = "0.1.6" +version = "0.1.7" description = "Smali ByteCode info (grammar) fetch tool written in Python" -authors = [ - { name = "AbhiTheModder", email = "allinoneallinone00@gmail.com" }, -] -license = {file = "LICENSE"} +authors = [{ name = "AbhiTheModder", email = "allinoneallinone00@gmail.com" }] +license = { file = "LICENSE" } readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" keywords = ["smali", "grammar", "parser", "android", "reverse-engineering"] classifiers = [ "Development Status :: 4 - Beta", @@ -19,7 +17,6 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -29,10 +26,7 @@ classifiers = [ "Operating System :: OS Independent", ] -dependencies = [ - "PyYAML", - "jsbeautifier", -] +dependencies = ["PyYAML", "jsbeautifier"] [project.urls] homepage = "https://github.com/RevEngiSquad/smalig" diff --git a/smalig/__init__.py b/smalig/__init__.py index 66e4c48..f28359d 100644 --- a/smalig/__init__.py +++ b/smalig/__init__.py @@ -1,4 +1,4 @@ -from .utils import YamlReader, InstructionFetch, cls -from .cli.app import main, app, help +from .utils import YamlReader, InstructionFetch, cls, grammar_yaml +from .cli.app import main -__all__ = ["YamlReader", "InstructionFetch", "main", "app", "help", "cls"] +__all__ = ["YamlReader", "InstructionFetch", "main", "cls", "grammar_yaml"] diff --git a/smalig/__main__.py b/smalig/__main__.py index 0add6a8..62baddd 100644 --- a/smalig/__main__.py +++ b/smalig/__main__.py @@ -1,59 +1,4 @@ -import sys -from smalig import app, help, cls +from smalig import main if __name__ == "__main__": - args = sys.argv[1:] - - if "-h" in args or "--help" in args: - help() - sys.exit(0) - - if "-m" in args: - exact_match = False - else: - exact_match = True - - if "-o" in args: - try: - output_file = args[args.index("-o") + 1] - except IndexError: - output_file = None - else: - output_file = None - - try: - if "-f" in args: - try: - file_path = args[args.index("-f") + 1] - except IndexError: - file_path = None - else: - file_path = input(f"Enter the path to the file: ") - - if "-t" in args: - try: - target = args[args.index("-t") + 1] - except IndexError: - target = "" - else: - target = input(f"Enter Query: ") - cls() - except KeyboardInterrupt: - print("\nExiting...") - sys.exit(0) - - if "-j" in args: - json = True - else: - if output_file and output_file.endswith(".json"): - json = True - else: - json = False - - if target == "": - raise Exception("Query is empty") - - if file_path == "": - raise Exception("File path is empty") - - app(file_path=file_path, target=target, json=json, out=output_file, exact_match=exact_match) + main() diff --git a/smalig/cli/app.py b/smalig/cli/app.py index 57761af..eff6b65 100644 --- a/smalig/cli/app.py +++ b/smalig/cli/app.py @@ -1,36 +1,22 @@ -import sys +import argparse import json as js import jsbeautifier -import importlib.resources -import textwrap -from smalig import YamlReader, InstructionFetch, cls +from smalig import YamlReader, InstructionFetch, cls, grammar_yaml -def help() -> None: - help_message = """ - smalig: Smali ByteCode info (grammar) fetch tool - - Usage: smalig [-t TARGET [-j] [-o OUTPUT_FILE] - - Options: - -t TARGET Specify the Smali instruction to fetch. If omitted, - prompts the user for input. - -j Output the result as JSON. If -o is also specified and the - OUTPUT_FILE ends in '.json', this flag is automatically set. - -o OUTPUT_FILE Write the output to the specified file. If omitted, prints to console. - - Examples: - smalig -t "const-string" # Fetch information for the 'const-string' instruction. - smalig -t "invoke-virtual" -j -o output.json # Fetch and save as JSON - smalig -o my_output.txt # Prompts for instruction then saves to my_output.txt - - - If no target is specified using -t, the tool will prompt for input. - - If no -o flag is used, the output goes to stdout. If a file is specified without a .json extension, plain text output is generated. - """ - print(textwrap.dedent(help_message)) +EXAMPLES = """ +examples: + smalig # Prompts for instruction then fetch it's information. + smalig -m # Prompts for instruction then fetch it's information with fuzzy match. + smalig -t "move" # Fetch information for the 'move' instruction. + smalig -t "move" -j # Output as JSON + smalig -t "invoke-virtual" -j -o output.json # Fetch and save as JSON + smalig -o my_output.txt # Prompts for instruction then saves to my_output.txt + smalig -t "move" -m # Fuzzy match + smalig -t "move" -o my_output.json # Save as JSON + smalig -t "move" -o my_output.txt # Save as plain text +""" def app(file_path, target, json, out, exact_match) -> None: @@ -74,60 +60,65 @@ def app(file_path, target, json, out, exact_match) -> None: return -def main() -> None: - """ - Main function - """ - args = sys.argv[1:] - - if "-h" in args or "--help" in args: - help() - return - - if "-m" in args: - exact_match = False - else: - exact_match = True - - if "-o" in args: - try: - output_file = args[args.index("-o") + 1] - except IndexError: - output_file = None +def parse_args(): + parser = argparse.ArgumentParser( + prog="smalig", + description="Smali ByteCode info (grammar) fetch tool", + epilog=EXAMPLES, + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument("-m", action="store_true", help="Enable fuzzy match") + parser.add_argument( + "-o", + metavar="OUTPUT_FILE", + help="Specify output file. If omitted, prints to console.", + ) + parser.add_argument( + "-t", + metavar="TARGET", + help="Specify the Smali instruction to fetch. If omitted, prompts the user for input.", + ) + parser.add_argument( + "-j", + action="store_true", + help="Enable JSON output. If omitted and OUTPUT_FILE ends in '.json', this flag is automatically set.", + ) + return parser.parse_args() + + +def get_target(args): + if args.t: + return args.t else: - output_file = None + target = input("Search instruction: ") + cls() + return target - with importlib.resources.path("smalig", "grammar.yaml") as file_path: - file_path = str(file_path) - if "-t" in args: - try: - target = args[args.index("-t") + 1] - except IndexError: - target = "" - else: - try: - target = input(f"Enter Query: ") - cls() - except KeyboardInterrupt: - print("\nExiting...") - return - - if "-j" in args: - json = True +def get_json(args, output_file): + if args.j: + return True + elif output_file and output_file.endswith(".json"): + return True else: - if output_file and output_file.endswith(".json"): - json = True - else: - json = False - - if target == "": - raise Exception("Query is empty") - - if file_path == "": - raise Exception("File path is empty") - - app(file_path=file_path, target=target, json=json, out=output_file, exact_match=exact_match) + return False + + +def main(): + args = parse_args() + file_path = grammar_yaml() + target = get_target(args) + if not target: + exit("Query is empty!") + json_output = get_json(args, args.o) + + app( + file_path=file_path, + target=target, + json=json_output, + out=args.o, + exact_match=not args.m, + ) if __name__ == "__main__": diff --git a/smalig/grammar.yaml b/smalig/grammar.yaml index feb80b4..2c4e96e 100644 --- a/smalig/grammar.yaml +++ b/smalig/grammar.yaml @@ -283,6 +283,54 @@ example: "1500 2041 - const/high16 v0, 0x41200000 (#float 10.0)" example_disc: "Moves the floating literal value 0x41200000(10.0) into v0. The 16 bit literal in the instruction carries the top 16 bits of the floating point number." +- opcode: "16" + name: "const-wide/16" + format: "AA|op BBBB" + format_id: "21s" + syntax: "const-wide/16 vAA, #+BBBB" + args_info: "A: destination register (8 bits), B: signed int (16 bits)" + short_desc: "Move the given literal value (sign-extended to 64 bits) into the specified register-pair." + long_desc: "Moves the literal value BBBB into register-pair (vAA, vAA+1), expanding the integer constant into a long constant. BBBB is sign-extended to 64 bits. The value of BBBB is in the range -32768 to 32767 (-0x8000 to 0x7FFF)." + note: "" + example: "1600 0A00 - const-wide/16 v0, 0xa (#long 10.0)" + example_disc: "Moves the long literal value 0xa(10) into (v0,v1) register-pair." + +- opcode: "17" + name: "const-wide/32" + format: "AA|op BBBBlo" + format_id: "31i" + syntax: "const-wide/32 vAA, #+BBBBBBBB" + args_info: "A: destination register (8 bits), B: signed int (32 bits)" + short_desc: "Move the given literal value (sign-extended to 64 bits) into the specified register-pair." + long_desc: "Moves the literal value BBBBBBBB into register-pair (vAA, vAA+1), expanding the integer constant into a long constant. BBBBBBBB is sign-extended to 64 bits. The value of BBBBBBBB is in the range -2147483648 to 2147483647 (-0x80000000 to 0x7FFFFFFF)." + note: "" + example: "1702 4E61 BC00 - const-wide/32 v2, 0x00bc614e" + example_disc: "Moves the long literal value 0x00bc614e into (v2,v3) register-pair." + +- opcode: "18" + name: "const-wide" + format: "AA|op BBBBlo BBBB BBBB BBBBhi" + format_id: "51l" + syntax: "const-wide vAA, #+BBBBBBBBBBBBBBBB" + args_info: "A: destination register (8 bits), B: signed int (64 bits)" + short_desc: "Move the given literal value into the specified register-pair." + long_desc: "Moves the literal value constant BBBBBBBBBBBBBBBB into register-pair (vAA, vAA+1). The value of BBBBBBBBBBBBBBBB is in the range -9223372036854775808 to 9223372036854775807 (-0x8000000000000000 to 0x7FFFFFFFFFFFFFFF)." + note: "" + example: "1802 874b 6b5d 54dc 2b00- const-wide v2, 0x002bdc545d6b4b87 (#long 12345678901234567)" + example_disc: "Moves the long literal value 0x002bdc545d6b4b87 into (v2,v3) register-pair." + +- opcode: "19" + name: "const-wide/high16" + format: "AA|op BBBB" + format_id: "21h" + syntax: "const-wide/high16 vAA, #+BBBB000000000000" + args_info: "A: destination register (8 bits), B: signed int (16 bits)" + short_desc: "Move the given literal value (right-zero-extended to 64 bits) into the specified register-pair." + long_desc: "Moves the literal value BBBB into register-pair (vAA, vAA+1). BBBB is right-zero-extended to 64 bits. The value of BBBB is in the range -32768 to 32767 (-0x8000 to 0x7FFF)." + note: "Generaly used to initialise double values." + example: "1900 2440 - const-wide/high16 v0, 0x402400000 (#double 10.0)" + example_disc: "Moves the double literal value 10.0 into (v0,v1) register-pair." + - opcode: "28" name: "goto" format: "AA|op" diff --git a/smalig/utils/__init__.py b/smalig/utils/__init__.py index 5a91dd2..afa9290 100644 --- a/smalig/utils/__init__.py +++ b/smalig/utils/__init__.py @@ -3,7 +3,13 @@ from io import StringIO -cls = lambda: print("\033c", end="") + +def cls(): + print("\033c", end="") + + +def grammar_yaml() -> str: + return os.path.join(os.path.dirname(os.path.dirname(__file__)), "grammar.yaml") class YamlReader: @@ -27,7 +33,9 @@ class InstructionFetch: def __init__(self, instructions: list[dict], target: str, exact_match: bool = True): self.instructions = instructions self.target: str = target - self.result: dict | list[dict] = self.fetch() if exact_match else self.fetch_fuzzy() + self.result: dict | list[dict] = ( + self.fetch() if exact_match else self.fetch_fuzzy() + ) def __str__(self): if isinstance(self.result, dict): @@ -39,9 +47,19 @@ def __str__(self): results += f"Args: {self.result['args_info']}\n" results += f"Short Info: {self.result['short_desc']}\n" results += f"Detailed Info: {self.result['long_desc']}\n" - results += f"Note: {self.result['note']}\n" if self.result.get("note") else "" - results += f"Example: {self.result['example']}\n" if self.result.get("example") else "" - results += f" Desc: {self.result['example_desc']}" if self.result.get("example_desc") else "" + results += ( + f"Note: {self.result['note']}\n" if self.result.get("note") else "" + ) + results += ( + f"Example: {self.result['example']}\n" + if self.result.get("example") + else "" + ) + results += ( + f" Desc: {self.result['example_desc']}" + if self.result.get("example_desc") + else "" + ) elif isinstance(self.result, list): results = "" for ith, instruction in enumerate(self.result): @@ -54,15 +72,33 @@ def __str__(self): results += f"Args: {instruction['args_info']}\n" results += f"Short Info: {instruction['short_desc']}\n" results += f"Detailed Info: {instruction['long_desc']}\n" - results += f"Note: {instruction['note']}\n" if instruction['note'] else "" - results += f"Example: {instruction['example']}\n" if instruction['example'] else "" - results += f" Desc: {instruction['example_desc']}\n\n" if instruction['example_desc'] else "" + results += ( + f"Note: {instruction['note']}\n" if instruction["note"] else "" + ) + results += ( + f"Example: {instruction['example']}\n" + if instruction["example"] + else "" + ) + results += ( + f" Desc: {instruction['example_desc']}\n\n" + if instruction["example_desc"] + else "" + ) else: results = "No matching instructions found." return results def __repr__(self): - return f"InstructionFetch(instructions={self.instructions}, target={self.target}, name={self.name}, opcode={self.opcode}, format={self.format}, format_id={self.format_id}, syntax={self.syntax}, args_info={self.args_info}, short_desc={self.short_desc}, long_desc={self.long_desc}, note={self.note}, example={self.example}, example_desc={self.example_desc})" + return ( + f"InstructionFetch(instructions={self.instructions}, " + f"target={self.target}, name={self.name}, " + f"opcode={self.opcode}, format={self.format}, " + f"format_id={self.format_id}, syntax={self.syntax}, " + f"args_info={self.args_info}, short_desc={self.short_desc}, " + f"long_desc={self.long_desc}, note={self.note}, " + f"example={self.example}, example_desc={self.example_desc})" + ) def fetch(self) -> dict: for instruction in self.instructions: @@ -72,7 +108,7 @@ def fetch(self) -> dict: if instruction["name"] == self.target: return instruction return {} - + def fetch_fuzzy(self) -> list[dict]: results = [] for instruction in self.instructions: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_smalig.py b/tests/test_smalig.py new file mode 100644 index 0000000..8f7b369 --- /dev/null +++ b/tests/test_smalig.py @@ -0,0 +1,32 @@ +# Tests for smalig +from smalig.utils import YamlReader, InstructionFetch, grammar_yaml + + +def test_fetch(): + reader = YamlReader(grammar_yaml()) + fetcher = InstructionFetch(reader.data, "move") + result = fetcher.fetch() + assert result["opcode"] == "01" + assert result["name"] == "move" + + +def test_fetch_fuzzy(): + reader = YamlReader(grammar_yaml()) + fetcher = InstructionFetch(reader.data, "move", exact_match=False) + result = fetcher.fetch_fuzzy() + assert len(result) > 0 + assert all("move" in instruction["name"] for instruction in result) + + +def test_fetch_nonexistent(): + reader = YamlReader(grammar_yaml()) + fetcher = InstructionFetch(reader.data, "nonexistent") + result = fetcher.fetch() + assert result == {} + + +def test_fetch_fuzzy_nonexistent(): + reader = YamlReader(grammar_yaml()) + fetcher = InstructionFetch(reader.data, "nonexistent", exact_match=False) + result = fetcher.fetch_fuzzy() + assert result == []