Files
fastcopy/test/parse_80_attribution.py
2025-05-20 16:26:58 +08:00

106 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

def ParseDataRuns(data_bytes: list, cluster_size=512):
"""
解析 NTFS $80 属性中的数据运行Data Run返回每个分片的起始字节数和长度。
参数:
data_bytes (list): 十六进制字符串组成的列表,表示完整的 $80 属性内容。
cluster_size (int): 簇大小(默认为 512 字节)
返回:
dict: 包含每个分片信息的字典,格式如下:
{
"is_resident": False,
"data_runs": {
"片段1": {"起始字节数": 3202351104, "字节长度": 499712 - 1},
"片段2": {...}
}
}
"""
def hex_list_to_int(lst, length, byteorder='little'):
"""从列表中提取指定长度的字节并转换为整数"""
bytes_data = bytes([int(x, 16) for x in lst[:length]])
return int.from_bytes(bytes_data, byteorder=byteorder)
result = {
"is_resident": True,
"data_runs": {}
}
# 检查是否是 $80 属性
if data_bytes[0] != '80':
raise ValueError("不是 $80 属性")
# 常驻标志在偏移 0x08第 8 个字节)
is_resident = data_bytes[8] == '00'
result["is_resident"] = is_resident
if is_resident:
result["data_runs"]["常驻文件"] = {
"起始字节数": 0,
"字节长度": "该文件为常驻,无分片"
}
return result
# 非常驻属性:获取数据运行偏移(偏移 0x20 处的 DWORD
data_run_offset = hex_list_to_int(data_bytes[0x20:0x20 + 4], 4)
if data_run_offset >= len(data_bytes):
raise ValueError("数据运行偏移超出范围")
# 提取数据运行部分
data_run_bytes = data_bytes[data_run_offset:]
pos = 0
fragment_index = 1
while pos < len(data_run_bytes):
header_byte = int(data_run_bytes[pos], 16)
if header_byte == 0x00:
break
# 高4位长度字段数量低4位偏移字段数量
len_len = (header_byte >> 4) & 0x0F
offset_len = header_byte & 0x0F
if len_len == 0 or offset_len == 0:
break
pos += 1
# 提取偏移量(小端序)
offset_bytes = data_run_bytes[pos:pos + offset_len]
offset = hex_list_to_int(offset_bytes, offset_len, byteorder='little')
# 提取长度(小端序)
length_bytes = data_run_bytes[pos + offset_len:pos + offset_len + len_len]
length = hex_list_to_int(length_bytes, len_len, byteorder='little')
# 计算起始字节数 = offset * cluster_size
start_byte = offset * cluster_size
byte_length = length * cluster_size - 1
result["data_runs"][f"片段{fragment_index}"] = {
"起始字节数": start_byte,
"字节长度": byte_length
}
pos += offset_len + len_len
fragment_index += 1
return result
input_data = [
'80', '00', '00', '00', '48', '00', '00', '00',
'01', '00', '00', '00', '00', '00', '01', '00',
'00', '00', '00', '00', '00', '00', '00', '00',
'79', '00', '00', '00', '00', '00', '00', '00',
'40', '00', '00', '00', '00', '00', '00', '00',
'00', 'a0', '07', '00', '00', '00', '00', '00',
'0b', '93', '07', '00', '00', '00', '00', '00',
'0b', '93', '07', '00', '00', '00', '00', '00',
'31', '7a', '00', 'ee', '0b', '00', '00', '00'
]
result = ParseDataRuns(input_data)
print(result)