106 lines
3.4 KiB
Python
106 lines
3.4 KiB
Python
def ParseDataRuns(data_bytes: list, cluster_size=512):
|
||
"""
|
||
解析 NTFS $80 属性中的数据运行(Data Run),返回每个分片的起始字节数和长度。
|
||
|
||
参数:
|
||
data_bytes (list): 十六进制字符串组成的列表,表示完整的 $80 属性内容。
|
||
cluster_size (int): 簇大小(默认为 512 字节)
|
||
|
||
返回:
|
||
dict: 包含每个分片信息的字典,格式如下:
|
||
{
|
||
"is_resident": False,
|
||
"data_runs": {
|
||
"片段1": {"起始字节数": 3202351104, "字节长度": 499712 - 1},
|
||
"片段2": {...}
|
||
}
|
||
}
|
||
"""
|
||
|
||
def hex_list_to_int(lst, length, byteorder='little'):
|
||
"""从列表中提取指定长度的字节并转换为整数"""
|
||
bytes_data = bytes([int(x, 16) for x in lst[:length]])
|
||
return int.from_bytes(bytes_data, byteorder=byteorder)
|
||
|
||
result = {
|
||
"is_resident": True,
|
||
"data_runs": {}
|
||
}
|
||
|
||
# 检查是否是 $80 属性
|
||
if data_bytes[0] != '80':
|
||
raise ValueError("不是 $80 属性")
|
||
|
||
# 常驻标志在偏移 0x08(第 8 个字节)
|
||
is_resident = data_bytes[8] == '00'
|
||
result["is_resident"] = is_resident
|
||
|
||
if is_resident:
|
||
result["data_runs"]["常驻文件"] = {
|
||
"起始字节数": 0,
|
||
"字节长度": "该文件为常驻,无分片"
|
||
}
|
||
return result
|
||
|
||
# 非常驻属性:获取数据运行偏移(偏移 0x20 处的 DWORD)
|
||
data_run_offset = hex_list_to_int(data_bytes[0x20:0x20 + 4], 4)
|
||
if data_run_offset >= len(data_bytes):
|
||
raise ValueError("数据运行偏移超出范围")
|
||
|
||
# 提取数据运行部分
|
||
data_run_bytes = data_bytes[data_run_offset:]
|
||
pos = 0
|
||
fragment_index = 1
|
||
|
||
while pos < len(data_run_bytes):
|
||
header_byte = int(data_run_bytes[pos], 16)
|
||
if header_byte == 0x00:
|
||
break
|
||
|
||
# 高4位:长度字段数量;低4位:偏移字段数量
|
||
len_len = (header_byte >> 4) & 0x0F
|
||
offset_len = header_byte & 0x0F
|
||
|
||
if len_len == 0 or offset_len == 0:
|
||
break
|
||
|
||
pos += 1
|
||
|
||
# 提取偏移量(小端序)
|
||
offset_bytes = data_run_bytes[pos:pos + offset_len]
|
||
offset = hex_list_to_int(offset_bytes, offset_len, byteorder='little')
|
||
|
||
# 提取长度(小端序)
|
||
length_bytes = data_run_bytes[pos + offset_len:pos + offset_len + len_len]
|
||
length = hex_list_to_int(length_bytes, len_len, byteorder='little')
|
||
|
||
# 计算起始字节数 = offset * cluster_size
|
||
start_byte = offset * cluster_size
|
||
byte_length = length * cluster_size - 1
|
||
|
||
result["data_runs"][f"片段{fragment_index}"] = {
|
||
"起始字节数": start_byte,
|
||
"字节长度": byte_length
|
||
}
|
||
|
||
pos += offset_len + len_len
|
||
fragment_index += 1
|
||
|
||
return result
|
||
|
||
|
||
input_data = [
|
||
'80', '00', '00', '00', '48', '00', '00', '00',
|
||
'01', '00', '00', '00', '00', '00', '01', '00',
|
||
'00', '00', '00', '00', '00', '00', '00', '00',
|
||
'79', '00', '00', '00', '00', '00', '00', '00',
|
||
'40', '00', '00', '00', '00', '00', '00', '00',
|
||
'00', 'a0', '07', '00', '00', '00', '00', '00',
|
||
'0b', '93', '07', '00', '00', '00', '00', '00',
|
||
'0b', '93', '07', '00', '00', '00', '00', '00',
|
||
'31', '7a', '00', 'ee', '0b', '00', '00', '00'
|
||
]
|
||
|
||
result = ParseDataRuns(input_data)
|
||
print(result)
|