\db_path style
This commit is contained in:
@@ -3,8 +3,7 @@ import os
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
# 导入你的模块函数
|
||||
from mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
||||
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
||||
|
||||
|
||||
# 工具函数:获取文件扩展名
|
||||
@@ -132,11 +131,20 @@ def GetFragmentLength(fragment):
|
||||
|
||||
|
||||
# 主函数:将 db_path 数据导入 db_node
|
||||
# 主函数:将 db_path 数据导入 db_node
|
||||
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
volume_letter = volume_letter.upper().strip()
|
||||
if len(volume_letter) == 1:
|
||||
volume_root = f"{volume_letter}:\\"
|
||||
elif volume_letter.endswith(':'):
|
||||
volume_root = f"{volume_letter}\\"
|
||||
else:
|
||||
volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入
|
||||
|
||||
print(f"🔍 当前处理磁盘根目录:{volume_root}")
|
||||
|
||||
group_id = GetFirstGroupId(cursor)
|
||||
user_id = GetFirstUserId(cursor)
|
||||
device_id = GetDeviceId(cursor)
|
||||
@@ -144,8 +152,26 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
|
||||
rows = cursor.fetchall()
|
||||
|
||||
insert_fields = [
|
||||
'PathID', 'ParentID', 'NameHash', 'PathHash',
|
||||
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
|
||||
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
|
||||
'FileSize', 'FileMode', 'FileHash', 'ExtentCount',
|
||||
# extent 字段
|
||||
"extent1_DeviceID", "extent1_Location", "extent1_Length",
|
||||
"extent2_DeviceID", "extent2_Location", "extent2_Length",
|
||||
"extent3_DeviceID", "extent3_Location", "extent3_Length",
|
||||
"extent4_DeviceID", "extent4_Location", "extent4_Length"
|
||||
]
|
||||
insert_placeholders = ', '.join('?' * len(insert_fields))
|
||||
insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})"
|
||||
|
||||
batch = []
|
||||
|
||||
for row in rows:
|
||||
path_id, full_path, name, parent_id = row
|
||||
path_id, relative_path, name, parent_id = row
|
||||
|
||||
full_path = os.path.join(volume_root, relative_path)
|
||||
|
||||
# 检查是否已存在相同 PathID
|
||||
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
|
||||
@@ -154,13 +180,10 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
|
||||
continue
|
||||
|
||||
# 获取文件的80h属性数据
|
||||
try:
|
||||
file80h_pattern = GetFile80hPattern(full_path)
|
||||
fragments = GetFragmentData(file80h_pattern)
|
||||
extent_count = min(len(fragments), 4) # 最多支持4个fragment
|
||||
print(f"✅ 分片数量为: {extent_count}")
|
||||
|
||||
extent_count = min(len(fragments), 4)
|
||||
except Exception as e:
|
||||
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
|
||||
fragments = []
|
||||
@@ -168,10 +191,9 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
|
||||
# 计算字段
|
||||
name_hash = hashlib.sha256(name.encode()).hexdigest()
|
||||
dir_layer = GetDirLayer(full_path)
|
||||
dir_layer = GetDirLayer(relative_path)
|
||||
extend_name_id = GetExtendNameId(name, cursor)
|
||||
|
||||
# ✅ 现在可以安全调用 GetFileSize(file80h_pattern)
|
||||
try:
|
||||
file_size = GetFileSize(file80h_pattern)
|
||||
except Exception as e:
|
||||
@@ -180,7 +202,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
|
||||
file_hash = GetFileHash(full_path)
|
||||
|
||||
# 获取文件的时间属性
|
||||
# 获取时间信息
|
||||
file_times = GetFilesTime(full_path)
|
||||
create_time = file_times["FileCreateTime"]
|
||||
modify_time = file_times["FileModifyTime"]
|
||||
@@ -192,21 +214,7 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
path_hash_result = cursor.fetchone()
|
||||
path_hash = path_hash_result[0] if path_hash_result else ""
|
||||
|
||||
# 构建插入语句字段和参数(保持原样)
|
||||
fields = [
|
||||
'PathID', 'ParentID', 'NameHash', 'PathHash',
|
||||
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
|
||||
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
|
||||
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
|
||||
]
|
||||
values = [
|
||||
path_id, parent_id, name_hash, path_hash,
|
||||
extend_name_id, dir_layer, group_id, user_id,
|
||||
create_time, modify_time, access_time, auth_time,
|
||||
file_size, 'default', file_hash, extent_count
|
||||
]
|
||||
|
||||
# 处理 Extent 片段字段
|
||||
# 构建 extent 字段
|
||||
extent_data = []
|
||||
for i in range(4): # 最多4个 extent
|
||||
if i < len(fragments):
|
||||
@@ -217,27 +225,34 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||
else:
|
||||
extent_data.extend([None, None, None])
|
||||
|
||||
# 拼接字段和值
|
||||
extent_fields = [
|
||||
"extent1_DeviceID", "extent1_Location", "extent1_Length",
|
||||
"extent2_DeviceID", "extent2_Location", "extent2_Length",
|
||||
"extent3_DeviceID", "extent3_Location", "extent3_Length",
|
||||
"extent4_DeviceID", "extent4_Location", "extent4_Length"
|
||||
# 构建插入数据
|
||||
values = [
|
||||
path_id, parent_id, name_hash, path_hash,
|
||||
extend_name_id, dir_layer, group_id, user_id,
|
||||
create_time, modify_time, access_time, auth_time,
|
||||
file_size, 'default', file_hash, extent_count,
|
||||
*extent_data
|
||||
]
|
||||
fields += extent_fields
|
||||
values += extent_data
|
||||
|
||||
# 构建 SQL 插入语句
|
||||
placeholders = ', '.join('?' * len(values))
|
||||
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
|
||||
batch.append(values)
|
||||
|
||||
# 执行插入
|
||||
cursor.execute(insert_sql, values)
|
||||
# 批量插入
|
||||
if len(batch) >= batch_size:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}")
|
||||
batch.clear()
|
||||
|
||||
# 插入剩余不足一批的数据
|
||||
if batch:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"✅ 数据已成功插入到 {table_name} 表")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
InsertNodeDataToDB()
|
||||
if __name__ == "__main__":
|
||||
volume_letter_test = "Y"
|
||||
InsertNodeDataToDB(volume_letter=volume_letter_test)
|
||||
|
@@ -27,23 +27,23 @@ def ScanVolume(volume_letter: str):
|
||||
"""
|
||||
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
||||
并为每个节点分配 ParentID。
|
||||
|
||||
返回:
|
||||
list of dict:包含文件/目录信息的字典列表
|
||||
"""
|
||||
|
||||
root_path = f"{volume_letter.upper()}:\\"
|
||||
if not os.path.exists(root_path):
|
||||
raise ValueError(f"磁盘 {root_path} 不存在")
|
||||
|
||||
result = []
|
||||
path_to_id = {} # 用于记录路径到数据库 ID 的映射
|
||||
counter = 1 # 模拟数据库自增 ID
|
||||
path_to_id = {} # 路径 -> ID 映射
|
||||
counter = 1
|
||||
|
||||
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
|
||||
# 过滤掉需要跳过的目录
|
||||
dirs[:] = [d for d in dirs if not ShouldSkipPath(os.path.join(root, d))]
|
||||
|
||||
for entry in files + dirs:
|
||||
entries = files + dirs
|
||||
|
||||
for entry in entries:
|
||||
full_path = os.path.join(root, entry)
|
||||
|
||||
if ShouldSkipPath(full_path):
|
||||
@@ -61,21 +61,35 @@ def ScanVolume(volume_letter: str):
|
||||
|
||||
name = entry
|
||||
|
||||
# ✅ 修正点:对 Path 字段进行哈希
|
||||
path_hash = GenerateHash(full_path)
|
||||
# 分离盘符并去除开头的 \
|
||||
_, relative_path = os.path.splitdrive(full_path)
|
||||
relative_path = relative_path.lstrip("\\")
|
||||
|
||||
# 如果是文件夹,Path 字段结尾加 '\\'
|
||||
if is_dir and not relative_path.endswith("\\"):
|
||||
relative_path += "\\"
|
||||
|
||||
# ✅ 关键修改点:将所有 \ 替换为 \\
|
||||
relative_path = relative_path.replace("\\", "\\\\")
|
||||
|
||||
path_hash = GenerateHash(relative_path)
|
||||
|
||||
# 计算 ContentSize(KB),小文件至少显示为 1 KB
|
||||
content_size = bytes_size // 1024
|
||||
if content_size == 0 and bytes_size > 0:
|
||||
content_size = 1
|
||||
|
||||
# 获取父目录路径
|
||||
parent_path = os.path.dirname(full_path)
|
||||
parent_id = path_to_id.get(parent_path, 0) # 默认为 0(根目录可能未录入)
|
||||
_, parent_relative_path = os.path.splitdrive(parent_path)
|
||||
parent_relative_path = parent_relative_path.lstrip("\\").rstrip("\\") # 去除首尾 \
|
||||
if os.path.isdir(parent_path) and not parent_relative_path.endswith("\\"): # 如果是目录,补 \
|
||||
parent_relative_path += "\\"
|
||||
parent_relative_path = parent_relative_path.replace("\\", "\\\\") # 转换为双反斜杠 \\
|
||||
|
||||
parent_id = path_to_id.get(parent_relative_path, 0)
|
||||
|
||||
item = {
|
||||
"ID": counter,
|
||||
"Path": full_path,
|
||||
"Path": relative_path,
|
||||
"Name": name,
|
||||
"PathHash": path_hash,
|
||||
"IsDir": is_dir,
|
||||
@@ -84,7 +98,7 @@ def ScanVolume(volume_letter: str):
|
||||
}
|
||||
|
||||
result.append(item)
|
||||
path_to_id[full_path] = counter
|
||||
path_to_id[relative_path] = counter
|
||||
counter += 1
|
||||
|
||||
except Exception as e:
|
||||
@@ -163,7 +177,7 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
||||
|
||||
# 示例主函数
|
||||
def main():
|
||||
volume_letter = "Z"
|
||||
volume_letter = "Y"
|
||||
|
||||
print(f"🔍 开始全盘扫描磁盘 {volume_letter}:\\ ...")
|
||||
scanned_data = ScanVolume(volume_letter)
|
||||
|
@@ -1,15 +1,15 @@
|
||||
from db_config import GetNTFSBootInfo, InsertInfoToDBConfig
|
||||
from db_device import ScanSpecialVolumes, InsertVolumesToDB
|
||||
from db_extend_name import InsertExtensionsToDB
|
||||
from db_group import InsertGroupToDB
|
||||
from db_path import GenerateHash, ShouldSkipPath, ScanVolume, InsertPathDataToDB
|
||||
from db_user import InsertUserToDB
|
||||
from db_node import InsertNodeDataToDB
|
||||
from ntfs_utils.db_config import GetNTFSBootInfo, InsertInfoToDBConfig
|
||||
from ntfs_utils.db_device import ScanSpecialVolumes, InsertVolumesToDB
|
||||
from ntfs_utils.db_extend_name import InsertExtensionsToDB
|
||||
from ntfs_utils.db_group import InsertGroupToDB
|
||||
from ntfs_utils.db_node import InsertNodeDataToDB
|
||||
from ntfs_utils.db_path import ScanVolume, InsertPathDataToDB
|
||||
from ntfs_utils.db_user import InsertUserToDB
|
||||
|
||||
volume_letter = 'Y'
|
||||
|
||||
|
||||
def main():
|
||||
volume_letter = 'Y'
|
||||
|
||||
# 初始化 db_config 表
|
||||
config_data = GetNTFSBootInfo(volume_letter)
|
||||
InsertInfoToDBConfig(config_data)
|
||||
@@ -42,7 +42,7 @@ def main():
|
||||
print(f"共插入 {count} 个新扩展名。")
|
||||
|
||||
# 初始化 db_node 表
|
||||
InsertNodeDataToDB()
|
||||
InsertNodeDataToDB(volume_letter)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
@@ -2,7 +2,7 @@ import os
|
||||
|
||||
import pytsk3
|
||||
|
||||
from db_config import GetNTFSBootInfo
|
||||
from ntfs_utils.db_config import GetNTFSBootInfo
|
||||
|
||||
|
||||
def find_file_mft_entry(fs, target_path):
|
||||
|
Reference in New Issue
Block a user