Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6bf2b21

Browse files
committedJul 3, 2023
feat: Recover string slices from readonly data
1 parent 30b63a9 commit 6bf2b21

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed
 

‎__init__.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
from binaryninja.binaryview import BinaryView, DataVariable
2+
from binaryninja.log import Logger
3+
from binaryninja.types import IntegerType, PointerType
4+
5+
from typing import List
6+
7+
logger = Logger(session_id=0, logger_name="rust_string_slicer")
8+
9+
10+
def recover_string_slices_from_readonly_data(bv: BinaryView):
11+
if bv.arch is None:
12+
logger.log_error("Could not get architecture of current binary view, exiting")
13+
return
14+
15+
readonly_segments = list(
16+
filter(
17+
lambda segment: segment.readable
18+
and not segment.writable
19+
and not segment.executable,
20+
bv.segments,
21+
)
22+
)
23+
24+
# Obtain all data vars which are pointers to data in readonly data segments
25+
data_vars_to_ro_segment_data: List[DataVariable] = []
26+
for _data_var_addr, candidate_string_slice_data_ptr in bv.data_vars.items():
27+
if isinstance(candidate_string_slice_data_ptr.type, PointerType):
28+
for readonly_segment in readonly_segments:
29+
if candidate_string_slice_data_ptr.value in readonly_segment:
30+
data_vars_to_ro_segment_data.append(candidate_string_slice_data_ptr)
31+
logger.log_debug(
32+
f"Found pointer var at {candidate_string_slice_data_ptr.address:#x} ({candidate_string_slice_data_ptr}) pointing to {candidate_string_slice_data_ptr.value:#x} "
33+
)
34+
35+
# Try to read an integer following the data var,
36+
# and treat it as a candidate for a string slice length.
37+
for candidate_string_slice_data_ptr in data_vars_to_ro_segment_data:
38+
candidate_string_slice_len_addr = (
39+
candidate_string_slice_data_ptr.address
40+
+ candidate_string_slice_data_ptr.type.width
41+
)
42+
43+
# Filter out anything at the candidate address
44+
# that's already defined as any data var type which is not an integer.
45+
existing_data_var_at_candidate_string_slice_len_addr = bv.get_data_var_at(
46+
candidate_string_slice_len_addr
47+
)
48+
if existing_data_var_at_candidate_string_slice_len_addr is not None:
49+
if not isinstance(
50+
existing_data_var_at_candidate_string_slice_len_addr.type, IntegerType
51+
):
52+
continue
53+
54+
candidate_string_slice_len = bv.read_int(
55+
address=candidate_string_slice_len_addr,
56+
size=bv.arch.default_int_size,
57+
sign=False,
58+
endian=bv.arch.endianness,
59+
)
60+
61+
logger.log_debug(
62+
f"Pointer var at {candidate_string_slice_data_ptr.address:#x} is followed by integer with value {candidate_string_slice_len:#x}"
63+
)
64+
65+
# Filter out any potential string slice which has length 0
66+
if candidate_string_slice_len == 0:
67+
continue
68+
69+
# Attempt to read out the pointed to value as a string slice, with the length obtained above.
70+
candidate_string_slice = bv.read(
71+
addr=candidate_string_slice_data_ptr.value,
72+
length=candidate_string_slice_len,
73+
)
74+
75+
logger.log_debug(
76+
f"Obtained candidate string slice with addr {candidate_string_slice_data_ptr.value:#x}, len {candidate_string_slice_len:#x}: {candidate_string_slice}"
77+
)
78+
79+
# Sanity check whether the recovered string is valid UTF-8
80+
try:
81+
candidate_utf8_string = candidate_string_slice.decode("utf-8")
82+
logger.log_info(
83+
f'Recovered string at addr {candidate_string_slice_data_ptr.value:#x}, len {candidate_string_slice_len:#x}: "{candidate_utf8_string}"'
84+
)
85+
except UnicodeDecodeError as err:
86+
logger.log_warn(
87+
"Candidate string slice {candidate_string_slice} does not decode to a valid UTF-8 string; excluding from final results: {err}"
88+
)
89+
continue
90+
91+
92+
recover_string_slices_from_readonly_data(bv)

0 commit comments

Comments
 (0)
Failed to load comments.