finish all table analyze
This commit is contained in:
139
test/export_useful_fragments.py
Normal file
139
test/export_useful_fragments.py
Normal file
@@ -0,0 +1,139 @@
|
||||
def extract_data_run_fragments(data_run):
|
||||
"""
|
||||
将 data_run 中的多个 Data Run 提取为独立的 list 片段。
|
||||
|
||||
参数:
|
||||
data_run (list): 十六进制字符串组成的列表,表示 Data Run 内容
|
||||
|
||||
返回:
|
||||
list: 每个元素是一个代表单个 Data Run 的 list
|
||||
"""
|
||||
result = []
|
||||
pos = 0
|
||||
|
||||
while pos < len(data_run):
|
||||
current_byte = data_run[pos]
|
||||
|
||||
if current_byte == '00':
|
||||
# 遇到空运行块,停止解析
|
||||
break
|
||||
|
||||
try:
|
||||
header = int(current_byte, 16)
|
||||
len_bytes = (header >> 4) & 0x0F
|
||||
offset_bytes = header & 0x0F
|
||||
|
||||
if len_bytes == 0 or offset_bytes == 0:
|
||||
print(f"⚠️ 无效的字段长度,跳过位置 {pos}")
|
||||
break
|
||||
|
||||
# 计算当前 Data Run 总长度
|
||||
run_length = 1 + offset_bytes + len_bytes
|
||||
|
||||
# 截取当前 Data Run
|
||||
fragment = data_run[pos: pos + run_length]
|
||||
|
||||
result.append(fragment)
|
||||
|
||||
# 移动指针
|
||||
pos += run_length
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 解析失败,位置 {pos}:{e}")
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def hex_list_to_int(lst, byteorder='little'):
|
||||
"""
|
||||
将十六进制字符串列表转换为整数(支持小端序)
|
||||
"""
|
||||
if byteorder == 'little':
|
||||
lst = list(reversed(lst))
|
||||
return int(''.join(f"{int(b, 16):02x}" for b in lst), 16)
|
||||
|
||||
|
||||
def parse_data_run(data_run, previous_cluster=0):
|
||||
"""
|
||||
解析 NTFS 单个 Data Run,返回起始簇号和结束簇号
|
||||
|
||||
参数:
|
||||
data_run (list): Data Run 的十六进制字符串列表
|
||||
previous_cluster (int): 上一个运行块的最后一个簇号(用于相对偏移)
|
||||
|
||||
返回:
|
||||
dict: 包含起始簇、结束簇、运行长度等信息
|
||||
"""
|
||||
if not data_run or data_run[0] == '00':
|
||||
return None
|
||||
|
||||
header = int(data_run[0], 16)
|
||||
len_bytes = (header >> 4) & 0x0F
|
||||
offset_bytes = header & 0x0F
|
||||
|
||||
# 提取偏移字段和长度字段(注意顺序是先偏移后长度)
|
||||
offset_data = data_run[1:1 + offset_bytes]
|
||||
length_data = data_run[1 + offset_bytes:1 + offset_bytes + len_bytes]
|
||||
|
||||
# 解析偏移和长度
|
||||
offset = hex_list_to_int(offset_data, 'little')
|
||||
run_length = hex_list_to_int(length_data, 'little')
|
||||
|
||||
# 计算起始簇号(如果是第一个就是绝对偏移,否则是相对偏移)
|
||||
starting_cluster = previous_cluster + offset
|
||||
ending_cluster = starting_cluster + run_length - 1
|
||||
|
||||
return {
|
||||
"starting_cluster": starting_cluster,
|
||||
"ending_cluster": ending_cluster,
|
||||
"run_length": run_length
|
||||
}
|
||||
|
||||
|
||||
def parse_multiple_data_runs(fragments):
|
||||
"""
|
||||
批量解析多个 Data Run 片段,支持相对偏移。
|
||||
|
||||
参数:
|
||||
fragments (list): 多个 Data Run 字符串列表,如:
|
||||
[
|
||||
['31', '7a', '00', 'ee', '0b'],
|
||||
['22', '29', '06', 'bb', '00'],
|
||||
...
|
||||
]
|
||||
|
||||
返回:
|
||||
list: 每个元素是一个 dict,包含该片段的解析结果
|
||||
"""
|
||||
results = []
|
||||
previous_starting_cluster = 0
|
||||
|
||||
for fragment in fragments:
|
||||
result = parse_data_run(fragment, previous_starting_cluster)
|
||||
|
||||
if result:
|
||||
results.append(result)
|
||||
previous_starting_cluster = result["starting_cluster"]
|
||||
|
||||
return results
|
||||
|
||||
|
||||
data_run = [
|
||||
'31', '7a', '00', 'ee', '0b',
|
||||
'22', '29', '06', 'bb', '00',
|
||||
'32', '7a', '02', 'ee', '00', '00',
|
||||
'00', 'a0', 'f8', 'ff', 'ff', 'ff', 'ff', 'ff'
|
||||
]
|
||||
|
||||
# Step 1: 提取所有有效片段
|
||||
fragments = extract_data_run_fragments(data_run)
|
||||
print("提取到的片段:")
|
||||
for i, frag in enumerate(fragments):
|
||||
print(f"片段{i + 1}: {frag}")
|
||||
|
||||
# Step 2: 批量解析这些片段
|
||||
results = parse_multiple_data_runs(fragments)
|
||||
print("\n解析结果:")
|
||||
for i, res in enumerate(results):
|
||||
print(f"片段{i + 1}: {res}")
|
92
test/get_extent_counts.py
Normal file
92
test/get_extent_counts.py
Normal file
@@ -0,0 +1,92 @@
|
||||
def analyze_ntfs_data_attribute(data):
|
||||
"""
|
||||
分析 NTFS 数据结构中的80属性($DATA),返回文件分片数量
|
||||
|
||||
参数:
|
||||
data (list): 包含字典的列表,每个字典需有'sequence'键
|
||||
(示例结构见问题描述)
|
||||
|
||||
返回:
|
||||
int: 分片数量(常驻属性返回1,非常驻属性返回数据运行的分片数)
|
||||
|
||||
异常:
|
||||
ValueError: 当输入数据无效时抛出
|
||||
"""
|
||||
# 第一步:提取并转换sequence数据
|
||||
hex_bytes = []
|
||||
for entry in data:
|
||||
if 'sequence' in entry:
|
||||
for hex_str in entry['sequence']:
|
||||
hex_bytes.extend(hex_str.split())
|
||||
|
||||
print(hex_bytes)
|
||||
|
||||
# 将十六进制字符串转换为整数列表
|
||||
try:
|
||||
attribute_data = [int(x, 16) for x in hex_bytes]
|
||||
except ValueError:
|
||||
raise ValueError("无效的十六进制数据")
|
||||
|
||||
# 第二步:分析属性结构
|
||||
if len(attribute_data) < 24:
|
||||
raise ValueError("属性数据过短,无法解析头部信息")
|
||||
|
||||
# 检查属性类型(0x80)
|
||||
if attribute_data[0] != 0x80:
|
||||
raise ValueError("不是80属性($DATA属性)")
|
||||
|
||||
# 检查是否常驻(偏移0x08)
|
||||
is_resident = attribute_data[8] == 0
|
||||
|
||||
if is_resident:
|
||||
return 1
|
||||
else:
|
||||
# 解析非常驻属性的数据运行列表
|
||||
data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
|
||||
|
||||
if data_run_offset >= len(attribute_data):
|
||||
raise ValueError("数据运行偏移超出属性长度")
|
||||
|
||||
data_runs = attribute_data[data_run_offset:]
|
||||
fragment_count = 0
|
||||
pos = 0
|
||||
|
||||
while pos < len(data_runs):
|
||||
header_byte = data_runs[pos]
|
||||
if header_byte == 0x00:
|
||||
break
|
||||
|
||||
len_len = (header_byte >> 4) & 0x0F
|
||||
offset_len = header_byte & 0x0F
|
||||
|
||||
if len_len == 0 or offset_len == 0:
|
||||
break
|
||||
|
||||
pos += 1 + len_len + offset_len
|
||||
fragment_count += 1
|
||||
|
||||
return fragment_count
|
||||
|
||||
|
||||
input_data = [
|
||||
{
|
||||
'start_byte': 3221267456,
|
||||
'offset': 264,
|
||||
'sequence': [
|
||||
'80 00 00 00 48 00 00 00',
|
||||
'01 00 00 00 00 00 01 00',
|
||||
'00 00 00 00 00 00 00 00',
|
||||
'79 00 00 00 00 00 00 00',
|
||||
'40 00 00 00 00 00 00 00',
|
||||
'00 a0 07 00 00 00 00 00',
|
||||
'0b 93 07 00 00 00 00 00',
|
||||
'0b 93 07 00 00 00 00 00',
|
||||
'31 7a 00 ee 0b 00 00 00'
|
||||
],
|
||||
'is_resident': False,
|
||||
'total_groups': 9,
|
||||
'attribute_length': 72
|
||||
}
|
||||
]
|
||||
|
||||
print(analyze_ntfs_data_attribute(input_data)) # 输出分片数量
|
105
test/parse_80_attribution.py
Normal file
105
test/parse_80_attribution.py
Normal file
@@ -0,0 +1,105 @@
|
||||
def ParseDataRuns(data_bytes: list, cluster_size=512):
|
||||
"""
|
||||
解析 NTFS $80 属性中的数据运行(Data Run),返回每个分片的起始字节数和长度。
|
||||
|
||||
参数:
|
||||
data_bytes (list): 十六进制字符串组成的列表,表示完整的 $80 属性内容。
|
||||
cluster_size (int): 簇大小(默认为 512 字节)
|
||||
|
||||
返回:
|
||||
dict: 包含每个分片信息的字典,格式如下:
|
||||
{
|
||||
"is_resident": False,
|
||||
"data_runs": {
|
||||
"片段1": {"起始字节数": 3202351104, "字节长度": 499712 - 1},
|
||||
"片段2": {...}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
def hex_list_to_int(lst, length, byteorder='little'):
|
||||
"""从列表中提取指定长度的字节并转换为整数"""
|
||||
bytes_data = bytes([int(x, 16) for x in lst[:length]])
|
||||
return int.from_bytes(bytes_data, byteorder=byteorder)
|
||||
|
||||
result = {
|
||||
"is_resident": True,
|
||||
"data_runs": {}
|
||||
}
|
||||
|
||||
# 检查是否是 $80 属性
|
||||
if data_bytes[0] != '80':
|
||||
raise ValueError("不是 $80 属性")
|
||||
|
||||
# 常驻标志在偏移 0x08(第 8 个字节)
|
||||
is_resident = data_bytes[8] == '00'
|
||||
result["is_resident"] = is_resident
|
||||
|
||||
if is_resident:
|
||||
result["data_runs"]["常驻文件"] = {
|
||||
"起始字节数": 0,
|
||||
"字节长度": "该文件为常驻,无分片"
|
||||
}
|
||||
return result
|
||||
|
||||
# 非常驻属性:获取数据运行偏移(偏移 0x20 处的 DWORD)
|
||||
data_run_offset = hex_list_to_int(data_bytes[0x20:0x20 + 4], 4)
|
||||
if data_run_offset >= len(data_bytes):
|
||||
raise ValueError("数据运行偏移超出范围")
|
||||
|
||||
# 提取数据运行部分
|
||||
data_run_bytes = data_bytes[data_run_offset:]
|
||||
pos = 0
|
||||
fragment_index = 1
|
||||
|
||||
while pos < len(data_run_bytes):
|
||||
header_byte = int(data_run_bytes[pos], 16)
|
||||
if header_byte == 0x00:
|
||||
break
|
||||
|
||||
# 高4位:长度字段数量;低4位:偏移字段数量
|
||||
len_len = (header_byte >> 4) & 0x0F
|
||||
offset_len = header_byte & 0x0F
|
||||
|
||||
if len_len == 0 or offset_len == 0:
|
||||
break
|
||||
|
||||
pos += 1
|
||||
|
||||
# 提取偏移量(小端序)
|
||||
offset_bytes = data_run_bytes[pos:pos + offset_len]
|
||||
offset = hex_list_to_int(offset_bytes, offset_len, byteorder='little')
|
||||
|
||||
# 提取长度(小端序)
|
||||
length_bytes = data_run_bytes[pos + offset_len:pos + offset_len + len_len]
|
||||
length = hex_list_to_int(length_bytes, len_len, byteorder='little')
|
||||
|
||||
# 计算起始字节数 = offset * cluster_size
|
||||
start_byte = offset * cluster_size
|
||||
byte_length = length * cluster_size - 1
|
||||
|
||||
result["data_runs"][f"片段{fragment_index}"] = {
|
||||
"起始字节数": start_byte,
|
||||
"字节长度": byte_length
|
||||
}
|
||||
|
||||
pos += offset_len + len_len
|
||||
fragment_index += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
input_data = [
|
||||
'80', '00', '00', '00', '48', '00', '00', '00',
|
||||
'01', '00', '00', '00', '00', '00', '01', '00',
|
||||
'00', '00', '00', '00', '00', '00', '00', '00',
|
||||
'79', '00', '00', '00', '00', '00', '00', '00',
|
||||
'40', '00', '00', '00', '00', '00', '00', '00',
|
||||
'00', 'a0', '07', '00', '00', '00', '00', '00',
|
||||
'0b', '93', '07', '00', '00', '00', '00', '00',
|
||||
'0b', '93', '07', '00', '00', '00', '00', '00',
|
||||
'31', '7a', '00', 'ee', '0b', '00', '00', '00'
|
||||
]
|
||||
|
||||
result = ParseDataRuns(input_data)
|
||||
print(result)
|
Reference in New Issue
Block a user