\db_path style

This commit is contained in:
Burgess Leo
2025-05-22 17:21:44 +08:00
parent 3347abe02f
commit d2a3a7b5b5
9 changed files with 301 additions and 111 deletions

View File

@@ -3,8 +3,7 @@ import os
import sqlite3
from datetime import datetime
# 导入你的模块函数
from mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
# 工具函数:获取文件扩展名
@@ -132,11 +131,20 @@ def GetFragmentLength(fragment):
# 主函数:将 db_path 数据导入 db_node
# 主函数:将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
volume_letter = volume_letter.upper().strip()
if len(volume_letter) == 1:
volume_root = f"{volume_letter}:\\"
elif volume_letter.endswith(':'):
volume_root = f"{volume_letter}\\"
else:
volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入
print(f"🔍 当前处理磁盘根目录:{volume_root}")
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
device_id = GetDeviceId(cursor)
@@ -144,8 +152,26 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
rows = cursor.fetchall()
insert_fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount',
# extent 字段
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
]
insert_placeholders = ', '.join('?' * len(insert_fields))
insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})"
batch = []
for row in rows:
path_id, full_path, name, parent_id = row
path_id, relative_path, name, parent_id = row
full_path = os.path.join(volume_root, relative_path)
# 检查是否已存在相同 PathID
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
@@ -154,13 +180,10 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
continue
# 获取文件的80h属性数据
try:
file80h_pattern = GetFile80hPattern(full_path)
fragments = GetFragmentData(file80h_pattern)
extent_count = min(len(fragments), 4) # 最多支持4个fragment
print(f"✅ 分片数量为: {extent_count}")
extent_count = min(len(fragments), 4)
except Exception as e:
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
fragments = []
@@ -168,10 +191,9 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
# 计算字段
name_hash = hashlib.sha256(name.encode()).hexdigest()
dir_layer = GetDirLayer(full_path)
dir_layer = GetDirLayer(relative_path)
extend_name_id = GetExtendNameId(name, cursor)
# ✅ 现在可以安全调用 GetFileSize(file80h_pattern)
try:
file_size = GetFileSize(file80h_pattern)
except Exception as e:
@@ -180,7 +202,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
file_hash = GetFileHash(full_path)
# 获取文件的时间属性
# 获取时间信息
file_times = GetFilesTime(full_path)
create_time = file_times["FileCreateTime"]
modify_time = file_times["FileModifyTime"]
@@ -192,21 +214,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
path_hash_result = cursor.fetchone()
path_hash = path_hash_result[0] if path_hash_result else ""
# 构建插入语句字段和参数(保持原样)
fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
]
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count
]
# 处理 Extent 片段字段
# 构建 extent 字段
extent_data = []
for i in range(4): # 最多4个 extent
if i < len(fragments):
@@ -217,27 +225,34 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
else:
extent_data.extend([None, None, None])
# 拼接字段和值
extent_fields = [
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
# 构建插入数据
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count,
*extent_data
]
fields += extent_fields
values += extent_data
# 构建 SQL 插入语句
placeholders = ', '.join('?' * len(values))
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
batch.append(values)
# 执行插入
cursor.execute(insert_sql, values)
# 批量插入
if len(batch) >= batch_size:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}")
batch.clear()
# 插入剩余不足一批的数据
if batch:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}")
conn.commit()
conn.close()
print(f"✅ 数据已成功插入到 {table_name}")
if __name__ == '__main__':
InsertNodeDataToDB()
if __name__ == "__main__":
volume_letter_test = "Y"
InsertNodeDataToDB(volume_letter=volume_letter_test)