Skip to content

Commit

Permalink
qs: optimize code string lookup
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed Jun 22, 2023
1 parent c4f6821 commit 5f19c6f
Showing 1 changed file with 15 additions and 5 deletions.
20 changes: 15 additions & 5 deletions floss/qs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,11 +391,11 @@ def check_is_code(code_offsets: Set[int], string: ExtractedString):
if addr in code_offsets:
return ("#code",)

# supplement code analysis with a database of junk code strings
junk_db = StringGlobalPrevalenceDatabase.from_file(
pathlib.Path(floss.qs.db.__file__).parent / "data" / "gp" / "junk-code.jsonl.gz"
)
if query_global_prevalence_database(junk_db, string.string):
return ()


def query_code_string_database(db: StringGlobalPrevalenceDatabase, string: str):
if db.query(string):
return ("#code-junk",)

return ()
Expand Down Expand Up @@ -466,6 +466,16 @@ def make_tagger(db, queryfn) -> Sequence[Tag]:
else:
raise ValueError(f"unexpected database type: {type(db)}")

# supplement code analysis with a database of junk code strings
junk_db = StringGlobalPrevalenceDatabase.from_file(
pathlib.Path(floss.qs.db.__file__).parent / "data" / "gp" / "junk-code.jsonl.gz"
)
ret.append(make_tagger(junk_db, query_code_string_database))

return ()



return ret


Expand Down

0 comments on commit 5f19c6f

Please sign in to comment.