Adding initial setup for function_extractor from binary files, (for use in graph networking later on)
This commit is contained in:
parent
ce2384cb72
commit
3680b94d17
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "FF_FunctionExtractor",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "herrewebpy/firmware_forensics/function_extractor.py",
|
||||||
|
"args": [
|
||||||
|
"--binary",
|
||||||
|
"sample_data/firmwares/S7_BL31.bin",
|
||||||
|
"--architecture",
|
||||||
|
"ARM_AARCH64",
|
||||||
|
"--endian",
|
||||||
|
"little",
|
||||||
|
],
|
||||||
|
"console": "integratedTerminal",
|
||||||
|
"justMyCode": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
7
debug.py
7
debug.py
@ -1,2 +1,5 @@
|
|||||||
from herrewebpy.bioinformatics import sequence_alignment
|
#from herrewebpy.bioinformatics import sequence_alignment
|
||||||
sequence_alignment.SequenceAlignment(['aa', 'bb', 'cc'],['bb','aa','cc'], ['1','2','3'], ['1','2','3'])
|
#sequence_alignment.SequenceAlignment(['aa', 'bb', 'cc'],['bb','aa','cc'], ['1','2','3'], ['1','2','3'])
|
||||||
|
|
||||||
|
from herrewebpy.firmware_forensics import function_extractor
|
||||||
|
function_extractor.FunctionExtractor('', 'ARM_AARCH64')
|
1
herrewebpy/firmware_forensics/__init__.py
Normal file
1
herrewebpy/firmware_forensics/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
from . import *
|
160
herrewebpy/firmware_forensics/function_extractor.py
Normal file
160
herrewebpy/firmware_forensics/function_extractor.py
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from capstone import *
|
||||||
|
from capstone.arm64 import ARM64_OP_IMM
|
||||||
|
from tqdm import tqdm
|
||||||
|
import pandas as pd
|
||||||
|
from herrewebpy import logger
|
||||||
|
|
||||||
|
class ARM_AARCH32_Parser:
|
||||||
|
def __init__(self, binary_bytes):
|
||||||
|
self.binary_bytes = binary_bytes
|
||||||
|
self.functions = []
|
||||||
|
|
||||||
|
def extract_functions(self):
|
||||||
|
# Extract functions from ARM AARCH32 binary. Not implemented yet.
|
||||||
|
return NotImplemented
|
||||||
|
|
||||||
|
|
||||||
|
class ARM_AARCH64_Parser:
|
||||||
|
def __init__(self, binary_bytes, start_address=0x0):
|
||||||
|
self.binary_bytes = binary_bytes
|
||||||
|
self.start_address = start_address
|
||||||
|
self.functions = []
|
||||||
|
|
||||||
|
def extract_functions(self, df, endian):
|
||||||
|
if endian == 'big':
|
||||||
|
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM | CS_MODE_BIG_ENDIAN)
|
||||||
|
else:
|
||||||
|
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
|
||||||
|
|
||||||
|
md.detail = True
|
||||||
|
|
||||||
|
instructions = list(md.disasm(self.binary_bytes, self.start_address))
|
||||||
|
func_start = None
|
||||||
|
func_name = None
|
||||||
|
seen_functions = set()
|
||||||
|
call_stack = []
|
||||||
|
|
||||||
|
for offset in tqdm(range(0, len(self.binary_bytes), 4), desc="Scanning binary for functions"):
|
||||||
|
instruction_bytes = self.binary_bytes[offset:offset+4]
|
||||||
|
if len(instruction_bytes) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
instruction = next(md.disasm(instruction_bytes, offset), None)
|
||||||
|
if instruction is None:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if func_start is None:
|
||||||
|
if instruction.mnemonic in ['stp', 'sub'] and 'sp' in instruction.op_str:
|
||||||
|
func_start = instruction.address
|
||||||
|
func_name = f"func_{func_start:x}"
|
||||||
|
seen_functions.add(func_start)
|
||||||
|
new_row = pd.DataFrame([{
|
||||||
|
'FUNC_ADDRESS': func_name,
|
||||||
|
'LOCATION': func_start,
|
||||||
|
'REFERENCES': []
|
||||||
|
}])
|
||||||
|
df = pd.concat([df, new_row], ignore_index=True)
|
||||||
|
|
||||||
|
if instruction.mnemonic in ['ret', 'bx'] and instruction.op_str == 'lr':
|
||||||
|
func_start = None
|
||||||
|
func_name = None
|
||||||
|
|
||||||
|
if instruction.mnemonic in ['bl', 'blx']:
|
||||||
|
if len(instruction.operands) > 0 and instruction.operands[0].type == ARM64_OP_IMM:
|
||||||
|
target_address = instruction.operands[0].imm
|
||||||
|
if not df.empty:
|
||||||
|
df.at[df.index[-1], 'REFERENCES'].append(f"func_{target_address:x}")
|
||||||
|
|
||||||
|
if target_address not in seen_functions:
|
||||||
|
call_stack.append(target_address)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def process_function(self, start_address, df, md, seen_functions, call_stack):
|
||||||
|
if start_address in seen_functions:
|
||||||
|
return
|
||||||
|
|
||||||
|
seen_functions.add(start_address)
|
||||||
|
func_start = start_address
|
||||||
|
func_name = f"func_{start_address:x}"
|
||||||
|
references = []
|
||||||
|
|
||||||
|
for instruction in md.disasm(self.binary_bytes[start_address:], start_address):
|
||||||
|
if instruction.mnemonic in ['ret', 'bx'] and instruction.op_str == 'lr':
|
||||||
|
new_row = pd.DataFrame([{
|
||||||
|
'FUNC_ADDRESS': func_name,
|
||||||
|
'LOCATION': func_start,
|
||||||
|
'REFERENCES': references
|
||||||
|
}])
|
||||||
|
df = pd.concat([df, new_row], ignore_index=True)
|
||||||
|
break
|
||||||
|
|
||||||
|
if instruction.mnemonic in ['bl', 'blx']:
|
||||||
|
target_address = instruction.operands[0].imm
|
||||||
|
references.append(f"func_{target_address:x}")
|
||||||
|
if target_address not in seen_functions:
|
||||||
|
call_stack.append(target_address)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
class FunctionExtractor:
|
||||||
|
def __init__(self, binary, architecture, endian):
|
||||||
|
self.binary = binary
|
||||||
|
self.architecture = architecture.upper()
|
||||||
|
self.endian = endian
|
||||||
|
self.dataframe = pd.DataFrame(columns=['FUNC_ADDRESS', 'LOCATION', 'REFERENCES'])
|
||||||
|
self.extract_functions()
|
||||||
|
|
||||||
|
def binary_bytes(self):
|
||||||
|
with open(self.binary, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def extract_functions(self):
|
||||||
|
binary_bytes = self.binary_bytes()
|
||||||
|
|
||||||
|
if self.architecture == 'ARM_AARCH32':
|
||||||
|
self.parser = ARM_AARCH32_Parser(binary_bytes)
|
||||||
|
self.functions = self.parser.extract_functions()
|
||||||
|
elif self.architecture == 'ARM_AARCH64':
|
||||||
|
self.parser = ARM_AARCH64_Parser(binary_bytes)
|
||||||
|
self.dataframe = self.parser.extract_functions(self.dataframe, self.endian)
|
||||||
|
else:
|
||||||
|
logger.error('Architecture not supported: ' + self.architecture)
|
||||||
|
return
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description='Extract functions from firmware binaries.')
|
||||||
|
parser.add_argument('--binary', help='The binary file to extract functions from.', required=True, type=str)
|
||||||
|
parser.add_argument('--output', help='The output file to write the functions to.', required=False, type=str)
|
||||||
|
parser.add_argument(
|
||||||
|
'--architecture',
|
||||||
|
help='The architecture of the binary. Use: ARM_AARCH64, ARM_AARCH32, etc.',
|
||||||
|
required=True,
|
||||||
|
type=str,
|
||||||
|
choices=['ARM_AARCH64', 'ARM_AARCH32']
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--endian',
|
||||||
|
help='The endianness of the binary. Use: little, big.',
|
||||||
|
required=True,
|
||||||
|
type=str,
|
||||||
|
choices=['little', 'big']
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logger.info('Extracting functions from binary: ' + args.binary, 'using architecture: ' + args.architecture.upper())
|
||||||
|
|
||||||
|
df = FunctionExtractor(args.binary, args.architecture, args.endian)
|
||||||
|
|
||||||
|
if not args.output:
|
||||||
|
args.output = args.binary + '.functions'
|
||||||
|
|
||||||
|
with open(args.output, 'w') as f:
|
||||||
|
df.dataframe.to_csv(f, index=False)
|
||||||
|
|
||||||
|
sys.path.append('.')
|
@ -1,6 +1,10 @@
|
|||||||
pandas
|
pandas
|
||||||
numpy
|
numpy
|
||||||
tensorflow
|
tensorflow
|
||||||
sklearn
|
scikit-learn
|
||||||
seaborn
|
seaborn
|
||||||
scikit-learn
|
scikit-learn
|
||||||
|
capstone
|
||||||
|
keystone
|
||||||
|
plotly
|
||||||
|
BioPython
|
BIN
sample_data/firmwares/S7_BL31.bin
Normal file
BIN
sample_data/firmwares/S7_BL31.bin
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user