Files
fastcopy/ntfs_utils/db_node.py
2025-05-23 18:01:42 +08:00

258 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import hashlib
import os
import sqlite3
from datetime import datetime
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
from ntfs_utils.main import volume_letter
# 工具函数:获取文件扩展名
def GetFileExtension(name: str) -> str:
parts = name.rsplit('.', 1)
return parts[1].lower() if len(parts) > 1 else ""
# 获取 ExtendNameID基于文件名后缀
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
ext = GetFileExtension(name)
if not ext:
return 0
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
result = cursor.fetchone()
return result[0] if result else 0
# 获取 DirLayer路径层级
def GetDirLayer(path: str) -> int:
path = path.strip()
if not path or path == "\\":
return 0
return path.count("\\") - 1
# 获取 GroupID默认第一个
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取 UserID默认第一个
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
def GetFilesTime(file_path):
"""
获取指定文件的创建时间、修改时间、访问时间和权限变更时间。
st_atime: 最后一次访问时间FileAccessTime
st_mtime: 最后一次修改内容的时间FileModifyTime
st_ctime: 文件元数据metadata更改时间在 Windows 中是文件创建时间FileCreateTime
参数:
file_path (str): 文件的绝对路径
返回:
dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式,
如果无法获取则返回 "default"
"""
if not os.path.exists(file_path):
return {
"FileCreateTime": "default",
"FileModifyTime": "default",
"FileAccessTime": "default",
"FileAuthTime": "default"
}
try:
stat_info = os.stat(file_path)
def ts_to_str(timestamp):
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
create_time = ts_to_str(stat_info.st_ctime)
modify_time = ts_to_str(stat_info.st_mtime)
access_time = ts_to_str(stat_info.st_atime)
# 权限变更时间Windows 下可能不适用
try:
auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime))
except Exception:
auth_time = "default"
return {
"FileCreateTime": create_time,
"FileModifyTime": modify_time,
"FileAccessTime": access_time,
"FileAuthTime": auth_time
}
except Exception as e:
print(f"❌ 获取文件时间失败: {e}")
return {
"FileCreateTime": "default",
"FileModifyTime": "default",
"FileAccessTime": "default",
"FileAuthTime": "default"
}
# 获取设备IDdb_device第一条记录
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取文件大小(伪数据)
def GetFileSize(file80h_pattern):
if file80h_pattern[0].get('is_resident'):
return GetFragmentData(file80h_pattern)[0].get('byte_length')
else:
size_list = ExtractSequenceHexValues(file80h_pattern)[56:64]
size = hex_list_to_int(size_list)
return size
# 获取文件内容哈希(伪数据)
def GetFileHash(full_path: str) -> str:
return hashlib.sha256(full_path.encode()).hexdigest()
# 新增:获取文件片段位置和长度
def GetFragmentLocation(fragment):
return fragment.get('starting_byte', 0)
def GetFragmentLength(fragment):
return fragment.get('byte_length', 0)
# 主函数:将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
if len(volume_letter) == 1:
volume_root = f"{volume_letter}:\\"
elif volume_letter.endswith(':'):
volume_root = f"{volume_letter}\\"
else:
volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入
print(f"🔍 当前处理磁盘根目录:{volume_root}")
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
device_id = GetDeviceId(cursor)
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
rows = cursor.fetchall()
insert_fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount',
# extent 字段
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
]
insert_placeholders = ', '.join('?' * len(insert_fields))
insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})"
batch = []
for row in rows:
path_id, relative_path, name, parent_id = row
full_path = os.path.join(volume_root, relative_path)
# 检查是否已存在相同 PathID
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
exists = cursor.fetchone()[0]
if exists > 0:
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
continue
try:
file80h_pattern = GetFile80hPattern(full_path)
fragments = GetFragmentData(file80h_pattern)
extent_count = min(len(fragments), 4)
except Exception as e:
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
fragments = []
extent_count = 0
# 计算字段
name_hash = hashlib.sha256(name.encode()).hexdigest()
dir_layer = GetDirLayer(relative_path)
extend_name_id = GetExtendNameId(name, cursor)
try:
file_size = GetFileSize(file80h_pattern)
except Exception as e:
print(f"⚠️ 获取文件大小失败,使用默认值 0: {e}")
file_size = 0
file_hash = GetFileHash(full_path)
# 获取时间信息
file_times = GetFilesTime(full_path)
create_time = file_times["FileCreateTime"]
modify_time = file_times["FileModifyTime"]
access_time = file_times["FileAccessTime"]
auth_time = file_times["FileAuthTime"]
# 查询 PathHash
cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
path_hash_result = cursor.fetchone()
path_hash = path_hash_result[0] if path_hash_result else ""
# 构建 extent 字段
extent_data = []
for i in range(4): # 最多4个 extent
if i < len(fragments):
frag = fragments[i]
location = GetFragmentLocation(frag)
length = GetFragmentLength(frag)
extent_data.extend([device_id, location, length])
else:
extent_data.extend([None, None, None])
# 构建插入数据
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count,
*extent_data
]
batch.append(values)
# 批量插入
if len(batch) >= batch_size:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}")
batch.clear()
# 插入剩余不足一批的数据
if batch:
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}")
conn.close()
print(f"✅ 数据已成功插入到 {table_name}")
if __name__ == "__main__":
InsertNodeDataToDB()