Skip to content

Commit

Permalink
Merge pull request #2201 from Ana06/ida_apis
Browse files Browse the repository at this point in the history
ida extractor: extract APIs from renamed globals
  • Loading branch information
mr-tz authored Aug 13, 2024
2 parents 40c7714 + be84992 commit 8d5fcdf
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- webui: explore capa analysis results in a web-based UI online and offline #2224 @s-ff
- support analyzing DRAKVUF traces #2143 @yelhamer
- IDA extractor: extract names from dynamically resolved APIs stored in renamed global variables #2201 @Ana06


### Breaking Changes
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ Please learn to write rules and contribute new entries as you find interesting t
# IDA Pro plugin: capa explorer
If you use IDA Pro, then you can use the [capa explorer](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) plugin.
capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database.
It also uses your local changes to the .idb to extract better features, such as when you rename a global variable that contains a dynamically resolved API address.
![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)
Expand Down
44 changes: 34 additions & 10 deletions capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
from typing import Any, Dict, Tuple, Iterator
import re
from typing import Any, Dict, Tuple, Iterator, Optional

import idc
import ida_ua
import idaapi
import idautils

Expand Down Expand Up @@ -35,9 +37,9 @@ def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
return ctx["externs_cache"]


def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]:
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]:
"""check instruction for API call"""
info = ()
info = None
ref = insn.ea

# attempt to resolve API calls by following chained thunks to a reasonable depth
Expand All @@ -52,16 +54,15 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A
except IndexError:
break

info = funcs.get(ref, ())
info = funcs.get(ref)
if info:
break

f = idaapi.get_func(ref)
if not f or not (f.flags & idaapi.FUNC_THUNK):
break

if info:
yield info
return info


def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
Expand All @@ -76,16 +77,39 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
if insn.get_canon_mnem() not in ("call", "jmp"):
return

# check calls to imported functions
for api in check_for_api_call(insn, get_imports(fh.ctx)):
# check call to imported functions
api = check_for_api_call(insn, get_imports(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]):
yield API(name), ih.address
# a call instruction should only call one function, stop if a call to an import is extracted
return

# check calls to extern functions
for api in check_for_api_call(insn, get_externs(fh.ctx)):
# check call to extern functions
api = check_for_api_call(insn, get_externs(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
yield API(api[1]), ih.address
# a call instruction should only call one function, stop if a call to an extern is extracted
return

# extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`)
# examples: `CreateProcessA`, `HttpSendRequestA`
if insn.Op1.type == ida_ua.o_mem:
op_addr = insn.Op1.addr
op_name = idaapi.get_name(op_addr)
# when renaming a global using an API name, IDA assigns it the function type
# ensure we do not extract something wrong by checking that the address has a name and a type
# we could check that the type is a function definition, but that complicates the code
if (not op_name.startswith("off_")) and idc.get_type(op_addr):
# Remove suffix used in repeated names, for example _0 in VirtualFree_0
match = re.match(r"(.+)_\d+", op_name)
if match:
op_name = match.group(1)
# the global name does not include the DLL name, so we can't extract it
for name in capa.features.extractors.helpers.generate_symbols("", op_name):
yield API(name), ih.address

# extract IDA/FLIRT recognized API functions
targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
Expand Down
1 change: 1 addition & 0 deletions capa/ida/plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ can update using the `Settings` button.
* Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature
* Double-click a result in the `Rule Information` column to expand its children
* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Disassembly view
* Reanalyze if you renamed global variables that store dynamically resolved APIs. capa will use these to improve its analysis.

#### Tips for Rule Generator

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ known_first_party = [
"ida_loader",
"ida_nalt",
"ida_segment",
"ida_ua",
"idaapi",
"idautils",
"idc",
Expand Down

0 comments on commit 8d5fcdf

Please sign in to comment.