Files
fastcopy/test/get_extent_counts.py
2025-05-20 16:26:58 +08:00

93 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

def analyze_ntfs_data_attribute(data):
"""
分析 NTFS 数据结构中的80属性($DATA),返回文件分片数量
参数:
data (list): 包含字典的列表,每个字典需有'sequence'
(示例结构见问题描述)
返回:
int: 分片数量(常驻属性返回1非常驻属性返回数据运行的分片数)
异常:
ValueError: 当输入数据无效时抛出
"""
# 第一步提取并转换sequence数据
hex_bytes = []
for entry in data:
if 'sequence' in entry:
for hex_str in entry['sequence']:
hex_bytes.extend(hex_str.split())
print(hex_bytes)
# 将十六进制字符串转换为整数列表
try:
attribute_data = [int(x, 16) for x in hex_bytes]
except ValueError:
raise ValueError("无效的十六进制数据")
# 第二步:分析属性结构
if len(attribute_data) < 24:
raise ValueError("属性数据过短,无法解析头部信息")
# 检查属性类型(0x80)
if attribute_data[0] != 0x80:
raise ValueError("不是80属性($DATA属性)")
# 检查是否常驻(偏移0x08)
is_resident = attribute_data[8] == 0
if is_resident:
return 1
else:
# 解析非常驻属性的数据运行列表
data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
if data_run_offset >= len(attribute_data):
raise ValueError("数据运行偏移超出属性长度")
data_runs = attribute_data[data_run_offset:]
fragment_count = 0
pos = 0
while pos < len(data_runs):
header_byte = data_runs[pos]
if header_byte == 0x00:
break
len_len = (header_byte >> 4) & 0x0F
offset_len = header_byte & 0x0F
if len_len == 0 or offset_len == 0:
break
pos += 1 + len_len + offset_len
fragment_count += 1
return fragment_count
input_data = [
{
'start_byte': 3221267456,
'offset': 264,
'sequence': [
'80 00 00 00 48 00 00 00',
'01 00 00 00 00 00 01 00',
'00 00 00 00 00 00 00 00',
'79 00 00 00 00 00 00 00',
'40 00 00 00 00 00 00 00',
'00 a0 07 00 00 00 00 00',
'0b 93 07 00 00 00 00 00',
'0b 93 07 00 00 00 00 00',
'31 7a 00 ee 0b 00 00 00'
],
'is_resident': False,
'total_groups': 9,
'attribute_length': 72
}
]
print(analyze_ntfs_data_attribute(input_data)) # 输出分片数量