Files
fastcopy/test/export_useful_fragments.py
2025-05-20 16:26:58 +08:00

140 lines
3.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

def extract_data_run_fragments(data_run):
"""
将 data_run 中的多个 Data Run 提取为独立的 list 片段。
参数:
data_run (list): 十六进制字符串组成的列表,表示 Data Run 内容
返回:
list: 每个元素是一个代表单个 Data Run 的 list
"""
result = []
pos = 0
while pos < len(data_run):
current_byte = data_run[pos]
if current_byte == '00':
# 遇到空运行块,停止解析
break
try:
header = int(current_byte, 16)
len_bytes = (header >> 4) & 0x0F
offset_bytes = header & 0x0F
if len_bytes == 0 or offset_bytes == 0:
print(f"⚠️ 无效的字段长度,跳过位置 {pos}")
break
# 计算当前 Data Run 总长度
run_length = 1 + offset_bytes + len_bytes
# 截取当前 Data Run
fragment = data_run[pos: pos + run_length]
result.append(fragment)
# 移动指针
pos += run_length
except Exception as e:
print(f"❌ 解析失败,位置 {pos}{e}")
break
return result
def hex_list_to_int(lst, byteorder='little'):
"""
将十六进制字符串列表转换为整数(支持小端序)
"""
if byteorder == 'little':
lst = list(reversed(lst))
return int(''.join(f"{int(b, 16):02x}" for b in lst), 16)
def parse_data_run(data_run, previous_cluster=0):
"""
解析 NTFS 单个 Data Run返回起始簇号和结束簇号
参数:
data_run (list): Data Run 的十六进制字符串列表
previous_cluster (int): 上一个运行块的最后一个簇号(用于相对偏移)
返回:
dict: 包含起始簇、结束簇、运行长度等信息
"""
if not data_run or data_run[0] == '00':
return None
header = int(data_run[0], 16)
len_bytes = (header >> 4) & 0x0F
offset_bytes = header & 0x0F
# 提取偏移字段和长度字段(注意顺序是先偏移后长度)
offset_data = data_run[1:1 + offset_bytes]
length_data = data_run[1 + offset_bytes:1 + offset_bytes + len_bytes]
# 解析偏移和长度
offset = hex_list_to_int(offset_data, 'little')
run_length = hex_list_to_int(length_data, 'little')
# 计算起始簇号(如果是第一个就是绝对偏移,否则是相对偏移)
starting_cluster = previous_cluster + offset
ending_cluster = starting_cluster + run_length - 1
return {
"starting_cluster": starting_cluster,
"ending_cluster": ending_cluster,
"run_length": run_length
}
def parse_multiple_data_runs(fragments):
"""
批量解析多个 Data Run 片段,支持相对偏移。
参数:
fragments (list): 多个 Data Run 字符串列表,如:
[
['31', '7a', '00', 'ee', '0b'],
['22', '29', '06', 'bb', '00'],
...
]
返回:
list: 每个元素是一个 dict包含该片段的解析结果
"""
results = []
previous_starting_cluster = 0
for fragment in fragments:
result = parse_data_run(fragment, previous_starting_cluster)
if result:
results.append(result)
previous_starting_cluster = result["starting_cluster"]
return results
data_run = [
'31', '7a', '00', 'ee', '0b',
'22', '29', '06', 'bb', '00',
'32', '7a', '02', 'ee', '00', '00',
'00', 'a0', 'f8', 'ff', 'ff', 'ff', 'ff', 'ff'
]
# Step 1: 提取所有有效片段
fragments = extract_data_run_fragments(data_run)
print("提取到的片段:")
for i, frag in enumerate(fragments):
print(f"片段{i + 1}: {frag}")
# Step 2: 批量解析这些片段
results = parse_multiple_data_runs(fragments)
print("\n解析结果:")
for i, res in enumerate(results):
print(f"片段{i + 1}: {res}")