\db_path style

This commit is contained in:
Burgess Leo
2025-05-22 17:21:44 +08:00
parent 3347abe02f
commit d2a3a7b5b5
9 changed files with 301 additions and 111 deletions

View File

@@ -3,8 +3,7 @@ import os
import sqlite3
from datetime import datetime
# 导入你的模块函数
from mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
# 工具函数:获取文件扩展名
@@ -132,11 +131,20 @@ def GetFragmentLength(fragment):
# 主函数:将 db_path 数据导入 db_node
# 主函数:将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
volume_letter = volume_letter.upper().strip()
if len(volume_letter) == 1:
volume_root = f"{volume_letter}:\\"
elif volume_letter.endswith(':'):
volume_root = f"{volume_letter}\\"
else:
volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入
print(f"🔍 当前处理磁盘根目录:{volume_root}")
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
device_id = GetDeviceId(cursor)
@@ -144,8 +152,26 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
rows = cursor.fetchall()
insert_fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount',
# extent 字段
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
]
insert_placeholders = ', '.join('?' * len(insert_fields))
insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})"
batch = []
for row in rows:
path_id, full_path, name, parent_id = row
path_id, relative_path, name, parent_id = row
full_path = os.path.join(volume_root, relative_path)
# 检查是否已存在相同 PathID
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
@@ -154,13 +180,10 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
continue
# 获取文件的80h属性数据
try:
file80h_pattern = GetFile80hPattern(full_path)
fragments = GetFragmentData(file80h_pattern)
extent_count = min(len(fragments), 4) # 最多支持4个fragment
print(f"✅ 分片数量为: {extent_count}")
extent_count = min(len(fragments), 4)
except Exception as e:
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
fragments = []
@@ -168,10 +191,9 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
# 计算字段
name_hash = hashlib.sha256(name.encode()).hexdigest()
dir_layer = GetDirLayer(full_path)
dir_layer = GetDirLayer(relative_path)
extend_name_id = GetExtendNameId(name, cursor)
# ✅ 现在可以安全调用 GetFileSize(file80h_pattern)
try:
file_size = GetFileSize(file80h_pattern)
except Exception as e:
@@ -180,7 +202,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
file_hash = GetFileHash(full_path)
# 获取文件的时间属性
# 获取时间信息
file_times = GetFilesTime(full_path)
create_time = file_times["FileCreateTime"]
modify_time = file_times["FileModifyTime"]
@@ -192,21 +214,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
path_hash_result = cursor.fetchone()
path_hash = path_hash_result[0] if path_hash_result else ""
# 构建插入语句字段和参数(保持原样)
fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
]
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count
]
# 处理 Extent 片段字段
# 构建 extent 字段
extent_data = []
for i in range(4): # 最多4个 extent
if i < len(fragments):
@@ -217,27 +225,34 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
else:
extent_data.extend([None, None, None])
# 拼接字段和值
extent_fields = [
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
# 构建插入数据
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count,
*extent_data
]
fields += extent_fields
values += extent_data
# 构建 SQL 插入语句
placeholders = ', '.join('?' * len(values))
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
batch.append(values)
# 执行插入
cursor.execute(insert_sql, values)
# 批量插入
if len(batch) >= batch_size:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}")
batch.clear()
# 插入剩余不足一批的数据
if batch:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}")
conn.commit()
conn.close()
print(f"✅ 数据已成功插入到 {table_name}")
if __name__ == '__main__':
InsertNodeDataToDB()
if __name__ == "__main__":
volume_letter_test = "Y"
InsertNodeDataToDB(volume_letter=volume_letter_test)

View File

@@ -27,23 +27,23 @@ def ScanVolume(volume_letter: str):
"""
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
并为每个节点分配 ParentID。
返回:
list of dict包含文件/目录信息的字典列表
"""
root_path = f"{volume_letter.upper()}:\\"
if not os.path.exists(root_path):
raise ValueError(f"磁盘 {root_path} 不存在")
result = []
path_to_id = {} # 用于记录路径到数据库 ID 映射
counter = 1 # 模拟数据库自增 ID
path_to_id = {} # 路径 -> ID 映射
counter = 1
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
# 过滤掉需要跳过的目录
dirs[:] = [d for d in dirs if not ShouldSkipPath(os.path.join(root, d))]
for entry in files + dirs:
entries = files + dirs
for entry in entries:
full_path = os.path.join(root, entry)
if ShouldSkipPath(full_path):
@@ -61,21 +61,35 @@ def ScanVolume(volume_letter: str):
name = entry
# ✅ 修正点:对 Path 字段进行哈希
path_hash = GenerateHash(full_path)
# 分离盘符并去除开头的 \
_, relative_path = os.path.splitdrive(full_path)
relative_path = relative_path.lstrip("\\")
# 如果是文件夹Path 字段结尾加 '\\'
if is_dir and not relative_path.endswith("\\"):
relative_path += "\\"
# ✅ 关键修改点:将所有 \ 替换为 \\
relative_path = relative_path.replace("\\", "\\\\")
path_hash = GenerateHash(relative_path)
# 计算 ContentSizeKB小文件至少显示为 1 KB
content_size = bytes_size // 1024
if content_size == 0 and bytes_size > 0:
content_size = 1
# 获取父目录路径
parent_path = os.path.dirname(full_path)
parent_id = path_to_id.get(parent_path, 0) # 默认为 0根目录可能未录入
_, parent_relative_path = os.path.splitdrive(parent_path)
parent_relative_path = parent_relative_path.lstrip("\\").rstrip("\\") # 去除首尾 \
if os.path.isdir(parent_path) and not parent_relative_path.endswith("\\"): # 如果是目录,补 \
parent_relative_path += "\\"
parent_relative_path = parent_relative_path.replace("\\", "\\\\") # 转换为双反斜杠 \\
parent_id = path_to_id.get(parent_relative_path, 0)
item = {
"ID": counter,
"Path": full_path,
"Path": relative_path,
"Name": name,
"PathHash": path_hash,
"IsDir": is_dir,
@@ -84,7 +98,7 @@ def ScanVolume(volume_letter: str):
}
result.append(item)
path_to_id[full_path] = counter
path_to_id[relative_path] = counter
counter += 1
except Exception as e:
@@ -163,7 +177,7 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
# 示例主函数
def main():
volume_letter = "Z"
volume_letter = "Y"
print(f"🔍 开始全盘扫描磁盘 {volume_letter}:\\ ...")
scanned_data = ScanVolume(volume_letter)

View File

@@ -1,15 +1,15 @@
from db_config import GetNTFSBootInfo, InsertInfoToDBConfig
from db_device import ScanSpecialVolumes, InsertVolumesToDB
from db_extend_name import InsertExtensionsToDB
from db_group import InsertGroupToDB
from db_path import GenerateHash, ShouldSkipPath, ScanVolume, InsertPathDataToDB
from db_user import InsertUserToDB
from db_node import InsertNodeDataToDB
from ntfs_utils.db_config import GetNTFSBootInfo, InsertInfoToDBConfig
from ntfs_utils.db_device import ScanSpecialVolumes, InsertVolumesToDB
from ntfs_utils.db_extend_name import InsertExtensionsToDB
from ntfs_utils.db_group import InsertGroupToDB
from ntfs_utils.db_node import InsertNodeDataToDB
from ntfs_utils.db_path import ScanVolume, InsertPathDataToDB
from ntfs_utils.db_user import InsertUserToDB
volume_letter = 'Y'
def main():
volume_letter = 'Y'
# 初始化 db_config 表
config_data = GetNTFSBootInfo(volume_letter)
InsertInfoToDBConfig(config_data)
@@ -42,7 +42,7 @@ def main():
print(f"共插入 {count} 个新扩展名。")
# 初始化 db_node 表
InsertNodeDataToDB()
InsertNodeDataToDB(volume_letter)
if __name__ == '__main__':

View File

@@ -2,7 +2,7 @@ import os
import pytsk3
from db_config import GetNTFSBootInfo
from ntfs_utils.db_config import GetNTFSBootInfo
def find_file_mft_entry(fs, target_path):