xx
This commit is contained in:
@@ -25,5 +25,5 @@ def ClearTableRecordsWithReset(db_path, table_name):
|
||||
if __name__ == '__main__':
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_path')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
|
||||
|
@@ -1,6 +1,8 @@
|
||||
import hashlib
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
from mft_analyze import GetFile80hPattern
|
||||
|
||||
@@ -24,12 +26,9 @@ def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
|
||||
|
||||
# 获取 DirLayer(路径层级)
|
||||
def GetDirLayer(path: str) -> int:
|
||||
# "Z:\demo.jpg" → 0 (根目录文件)
|
||||
# "Z:\pictures\RHCE.jpg" → 1 (一级子目录)
|
||||
path = path.strip()
|
||||
if not path or path == "\\":
|
||||
return 0
|
||||
# 计算路径中的反斜杠数量,减去根目录的反斜杠
|
||||
return path.count("\\") - 1
|
||||
|
||||
|
||||
@@ -47,6 +46,62 @@ def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
|
||||
return result[0] if result else 0
|
||||
|
||||
|
||||
def GetFilesTime(file_path):
|
||||
"""
|
||||
获取指定文件的创建时间、修改时间、访问时间和权限变更时间。
|
||||
st_atime: 最后一次访问时间(FileAccessTime)
|
||||
st_mtime: 最后一次修改内容的时间(FileModifyTime)
|
||||
st_ctime: 文件元数据(metadata)更改时间,在 Windows 中是文件创建时间(FileCreateTime)
|
||||
注意:Windows 和 Linux 在这些字段的定义上略有不同,比如 Linux 中 st_ctime 是元数据变更时间,而不是创建时间。
|
||||
参数:
|
||||
file_path (str): 文件的绝对路径
|
||||
|
||||
返回:
|
||||
dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式,
|
||||
如果无法获取则返回 "default"。
|
||||
"""
|
||||
if not os.path.exists(file_path):
|
||||
return {
|
||||
"FileCreateTime": "default",
|
||||
"FileModifyTime": "default",
|
||||
"FileAccessTime": "default",
|
||||
"FileAuthTime": "default"
|
||||
}
|
||||
|
||||
try:
|
||||
stat_info = os.stat(file_path)
|
||||
|
||||
# 将时间戳转换为可读格式字符串 ISO 8601 格式
|
||||
def ts_to_str(timestamp):
|
||||
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
|
||||
|
||||
create_time = ts_to_str(stat_info.st_ctime)
|
||||
modify_time = ts_to_str(stat_info.st_mtime)
|
||||
access_time = ts_to_str(stat_info.st_atime)
|
||||
|
||||
# 权限变更时间,Linux 上是 metadata 修改时间,Windows 上可能不适用
|
||||
try:
|
||||
auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime))
|
||||
except Exception:
|
||||
auth_time = "default"
|
||||
|
||||
return {
|
||||
"FileCreateTime": create_time,
|
||||
"FileModifyTime": modify_time,
|
||||
"FileAccessTime": access_time,
|
||||
"FileAuthTime": auth_time
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 获取文件时间失败: {e}")
|
||||
return {
|
||||
"FileCreateTime": "default",
|
||||
"FileModifyTime": "default",
|
||||
"FileAccessTime": "default",
|
||||
"FileAuthTime": "default"
|
||||
}
|
||||
|
||||
|
||||
# 获取设备ID(db_device第一条记录)
|
||||
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
|
||||
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
|
||||
@@ -65,24 +120,72 @@ def GetFileHash(full_path: str) -> str:
|
||||
|
||||
|
||||
# 获取分片数(1~4)
|
||||
def GetExtentCount(full_path: str) -> int:
|
||||
def GetExtentCount(data):
|
||||
"""
|
||||
分析 NTFS 数据结构中的80属性($DATA),返回文件分片数量
|
||||
|
||||
参数:
|
||||
data (list): 包含字典的列表,每个字典需有'sequence'键
|
||||
(示例结构见问题描述)
|
||||
|
||||
返回:
|
||||
int: 分片数量(常驻属性返回1,非常驻属性返回数据运行的分片数)
|
||||
|
||||
异常:
|
||||
ValueError: 当输入数据无效时抛出
|
||||
"""
|
||||
# 第一步:提取并转换sequence数据
|
||||
hex_bytes = []
|
||||
for entry in data:
|
||||
if 'sequence' in entry:
|
||||
for hex_str in entry['sequence']:
|
||||
hex_bytes.extend(hex_str.split())
|
||||
|
||||
# 将十六进制字符串转换为整数列表
|
||||
try:
|
||||
pattern = GetFile80hPattern(full_path)
|
||||
if not pattern:
|
||||
return 1 # 默认值
|
||||
attribute_data = [int(x, 16) for x in hex_bytes]
|
||||
except ValueError:
|
||||
raise ValueError("无效的十六进制数据")
|
||||
|
||||
# 取第一个80h属性(通常文件只有一个80h属性)
|
||||
attr = pattern[0]
|
||||
# 第二步:分析属性结构
|
||||
if len(attribute_data) < 24:
|
||||
raise ValueError("属性数据过短,无法解析头部信息")
|
||||
|
||||
if attr['is_resident']:
|
||||
return 1 # 常驻属性只有一个分片
|
||||
else:
|
||||
# 非常驻属性需要解析实际分片数
|
||||
# 这里简化为从sequence中解析,实际可能需要更复杂的解析
|
||||
return 1 # 简化处理,实际应根据数据结构解析
|
||||
except Exception as e:
|
||||
print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1")
|
||||
return 1 # 出错时返回默认值
|
||||
# 检查属性类型(0x80)
|
||||
if attribute_data[0] != 0x80:
|
||||
raise ValueError("不是80属性($DATA属性)")
|
||||
|
||||
# 检查是否常驻(偏移0x08)
|
||||
is_resident = attribute_data[8] == 0
|
||||
|
||||
if is_resident:
|
||||
return 1
|
||||
else:
|
||||
# 解析非常驻属性的数据运行列表
|
||||
data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
|
||||
|
||||
if data_run_offset >= len(attribute_data):
|
||||
raise ValueError("数据运行偏移超出属性长度")
|
||||
|
||||
data_runs = attribute_data[data_run_offset:]
|
||||
fragment_count = 0
|
||||
pos = 0
|
||||
|
||||
while pos < len(data_runs):
|
||||
header_byte = data_runs[pos]
|
||||
if header_byte == 0x00:
|
||||
break
|
||||
|
||||
len_len = (header_byte >> 4) & 0x0F
|
||||
offset_len = header_byte & 0x0F
|
||||
|
||||
if len_len == 0 or offset_len == 0:
|
||||
break
|
||||
|
||||
pos += 1 + len_len + offset_len
|
||||
fragment_count += 1
|
||||
|
||||
return fragment_count
|
||||
|
||||
|
||||
# 获取随机位置
|
||||
@@ -95,53 +198,6 @@ def GetRandomLength() -> int:
|
||||
return random.randint(1000, 9999)
|
||||
|
||||
|
||||
def GetFileLocation(full_path: str) -> int:
|
||||
try:
|
||||
pattern = GetFile80hPattern(full_path)
|
||||
if not pattern:
|
||||
return GetRandomLocation() # 回退到随机值
|
||||
|
||||
attr = pattern[0]
|
||||
if attr['is_resident']:
|
||||
# 常驻属性: start_byte + offset + content_offset
|
||||
# 解析content_offset (sequence第三个元素的后4字节)
|
||||
content_offset_bytes = attr['sequence'][2].split()[4:8]
|
||||
content_offset = int.from_bytes(
|
||||
bytes.fromhex(''.join(content_offset_bytes)),
|
||||
byteorder='little'
|
||||
)
|
||||
return attr['start_byte'] + attr['offset'] + content_offset
|
||||
else:
|
||||
# 非常驻属性需要解析runlist
|
||||
# 这里简化为返回start_byte
|
||||
return attr['start_byte']
|
||||
except Exception as e:
|
||||
print(f"❌ 获取Location出错: {e}, 使用随机值")
|
||||
return GetRandomLocation() # 出错时返回随机值
|
||||
|
||||
|
||||
def GetFileLength(full_path: str) -> int:
|
||||
try:
|
||||
pattern = GetFile80hPattern(full_path)
|
||||
if not pattern:
|
||||
return GetRandomLength() # 回退到随机值
|
||||
|
||||
attr = pattern[0]
|
||||
if attr['is_resident']:
|
||||
# 常驻属性: 解析sequence第三个元素的前4字节
|
||||
content_length_bytes = attr['sequence'][2].split()[0:4]
|
||||
return int.from_bytes(
|
||||
bytes.fromhex(''.join(content_length_bytes)),
|
||||
byteorder='little'
|
||||
)
|
||||
else:
|
||||
# 非常驻属性: 从属性头中解析实际大小
|
||||
return attr['attribute_length'] # 简化处理
|
||||
except Exception as e:
|
||||
print(f"❌ 获取Length出错: {e}, 使用随机值")
|
||||
return GetRandomLength() # 出错时返回随机值
|
||||
|
||||
|
||||
# 主函数:将 db_path 数据导入 db_node
|
||||
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
conn = sqlite3.connect(db_path)
|
||||
@@ -154,8 +210,6 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
|
||||
rows = cursor.fetchall()
|
||||
|
||||
inserted_count = 0 # 新增:记录实际插入的条目数
|
||||
|
||||
for row in rows:
|
||||
path_id, full_path, name, parent_id = row
|
||||
|
||||
@@ -172,17 +226,40 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
extend_name_id = GetExtendNameId(name, cursor)
|
||||
file_size = GetFileSize(full_path)
|
||||
file_hash = GetFileHash(full_path)
|
||||
extent_count = GetExtentCount(full_path)
|
||||
|
||||
# 构建插入语句字段和参数
|
||||
# 获取文件的时间属性
|
||||
file_times = GetFilesTime(full_path)
|
||||
create_time = file_times["FileCreateTime"]
|
||||
modify_time = file_times["FileModifyTime"]
|
||||
access_time = file_times["FileAccessTime"]
|
||||
auth_time = file_times["FileAuthTime"]
|
||||
|
||||
# 新增:根据 $80 属性获取更精确的 ExtentCount
|
||||
try:
|
||||
attribute_80_data = GetFile80hPattern(full_path)
|
||||
|
||||
if not attribute_80_data or not isinstance(attribute_80_data, list):
|
||||
raise ValueError("无效的 80h 属性数据")
|
||||
|
||||
extent_count = GetExtentCount(attribute_80_data)
|
||||
|
||||
print(f"✅ 分片数量为: {extent_count}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
|
||||
extent_count = 0
|
||||
|
||||
# 构建插入语句字段和参数(保持原样)
|
||||
fields = [
|
||||
'PathID', 'ParentID', 'NameHash', 'PathHash',
|
||||
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
|
||||
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
|
||||
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
|
||||
]
|
||||
values = [
|
||||
path_id, parent_id, name_hash, '', # PathHash 待填
|
||||
extend_name_id, dir_layer, group_id, user_id,
|
||||
create_time, modify_time, access_time, auth_time,
|
||||
file_size, 'default', file_hash, extent_count
|
||||
]
|
||||
|
||||
@@ -218,16 +295,10 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
|
||||
# 执行插入
|
||||
cursor.execute(insert_sql, values)
|
||||
inserted_count += 1 # 新增:成功插入后计数器加1
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# 新增:根据插入结果输出不同信息
|
||||
if inserted_count > 0:
|
||||
print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name} 表")
|
||||
else:
|
||||
print("ℹ️ 没有新的数据被插入数据库(可能所有条目已存在或没有可处理的数据)")
|
||||
print(f"✅ 数据已成功插入到 {table_name} 表")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -60,7 +60,7 @@ def GetFileMftEntry(file_path):
|
||||
drive_letter = os.path.splitdrive(file_path)[0][0]
|
||||
device = f"\\\\.\\{drive_letter}:"
|
||||
|
||||
print(f"Opening device: {device}")
|
||||
# print(f"Opening device: {device}")
|
||||
|
||||
try:
|
||||
img = pytsk3.Img_Info(device)
|
||||
@@ -73,10 +73,10 @@ def GetFileMftEntry(file_path):
|
||||
root_path = f"{drive_letter}:\\"
|
||||
rel_path = os.path.relpath(abs_path, root_path).replace("/", "\\")
|
||||
|
||||
print(f"Looking up MFT entry for: {rel_path}")
|
||||
# print(f"Looking up MFT entry for: {rel_path}")
|
||||
|
||||
mft_entry = find_file_mft_entry(fs, rel_path)
|
||||
print(f"MFT Entry: {mft_entry}")
|
||||
# print(f"MFT Entry: {mft_entry}")
|
||||
if mft_entry is None:
|
||||
raise RuntimeError("Could not find MFT entry for the specified file.")
|
||||
|
||||
@@ -105,7 +105,7 @@ def CalculateFileMftStartSector(mft_entry, volume_letter="Z"):
|
||||
start_sector = config_data["MftPosition"] * 8 + mft_entry * 2
|
||||
if start_sector < 0:
|
||||
raise ValueError("起始扇区号不能为负数")
|
||||
print(f"文件 MFT Entry 的起始扇区号: {start_sector}")
|
||||
# print(f"文件 MFT Entry 的起始扇区号: {start_sector}")
|
||||
return start_sector
|
||||
|
||||
|
||||
@@ -217,11 +217,106 @@ def GetFile80hPattern(file_path):
|
||||
try:
|
||||
mft_entry_value = GetFileMftEntry(file_path)
|
||||
StartSector = CalculateFileMftStartSector(mft_entry_value, volume_letter)
|
||||
print(f"\n文件的相关信息以及80属性内容:")
|
||||
print(Get80hPattern(StartSector, volume_letter))
|
||||
# print(f"文件的相关信息以及80属性内容:")
|
||||
# print(Get80hPattern(StartSector, volume_letter))
|
||||
file80h_pattern = Get80hPattern(StartSector, volume_letter)
|
||||
return file80h_pattern
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
GetFile80hPattern(r"Z:\hello.txt")
|
||||
# if __name__ == '__main__':
|
||||
# GetFile80hPattern(r"Z:\demo.jpg")
|
||||
|
||||
|
||||
def analyze_ntfs_data_attribute(data):
|
||||
"""
|
||||
分析 NTFS 数据结构中的80属性($DATA),返回文件分片数量
|
||||
|
||||
参数:
|
||||
data (list): 包含字典的列表,每个字典需有'sequence'键
|
||||
(示例结构见问题描述)
|
||||
|
||||
返回:
|
||||
int: 分片数量(常驻属性返回1,非常驻属性返回数据运行的分片数)
|
||||
|
||||
异常:
|
||||
ValueError: 当输入数据无效时抛出
|
||||
"""
|
||||
# 第一步:提取并转换sequence数据
|
||||
hex_bytes = []
|
||||
for entry in data:
|
||||
if 'sequence' in entry:
|
||||
for hex_str in entry['sequence']:
|
||||
hex_bytes.extend(hex_str.split())
|
||||
|
||||
# 将十六进制字符串转换为整数列表
|
||||
try:
|
||||
attribute_data = [int(x, 16) for x in hex_bytes]
|
||||
except ValueError:
|
||||
raise ValueError("无效的十六进制数据")
|
||||
|
||||
# 第二步:分析属性结构
|
||||
if len(attribute_data) < 24:
|
||||
raise ValueError("属性数据过短,无法解析头部信息")
|
||||
|
||||
# 检查属性类型(0x80)
|
||||
if attribute_data[0] != 0x80:
|
||||
raise ValueError("不是80属性($DATA属性)")
|
||||
|
||||
# 检查是否常驻(偏移0x08)
|
||||
is_resident = attribute_data[8] == 0
|
||||
|
||||
if is_resident:
|
||||
return 1
|
||||
else:
|
||||
# 解析非常驻属性的数据运行列表
|
||||
data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
|
||||
|
||||
if data_run_offset >= len(attribute_data):
|
||||
raise ValueError("数据运行偏移超出属性长度")
|
||||
|
||||
data_runs = attribute_data[data_run_offset:]
|
||||
fragment_count = 0
|
||||
pos = 0
|
||||
|
||||
while pos < len(data_runs):
|
||||
header_byte = data_runs[pos]
|
||||
if header_byte == 0x00:
|
||||
break
|
||||
|
||||
len_len = (header_byte >> 4) & 0x0F
|
||||
offset_len = header_byte & 0x0F
|
||||
|
||||
if len_len == 0 or offset_len == 0:
|
||||
break
|
||||
|
||||
pos += 1 + len_len + offset_len
|
||||
fragment_count += 1
|
||||
|
||||
return fragment_count
|
||||
|
||||
|
||||
input_data = [
|
||||
{
|
||||
'start_byte': 3221267456,
|
||||
'offset': 264,
|
||||
'sequence': [
|
||||
'80 00 00 00 48 00 00 00',
|
||||
'01 00 00 00 00 00 01 00',
|
||||
'00 00 00 00 00 00 00 00',
|
||||
'79 00 00 00 00 00 00 00',
|
||||
'40 00 00 00 00 00 00 00',
|
||||
'00 a0 07 00 00 00 00 00',
|
||||
'0b 93 07 00 00 00 00 00',
|
||||
'0b 93 07 00 00 00 00 00',
|
||||
'31 7a 00 ee 0b 00 00 00'
|
||||
],
|
||||
'is_resident': False,
|
||||
'total_groups': 9,
|
||||
'attribute_length': 72
|
||||
}
|
||||
]
|
||||
|
||||
print(analyze_ntfs_data_attribute(input_data)) # 输出分片数量
|
||||
|
Binary file not shown.
Reference in New Issue
Block a user