Files
fastcopy/ntfs_utils/db_node.py
2025-05-20 16:26:58 +08:00

244 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import hashlib
import os
import sqlite3
from datetime import datetime
# 导入你的模块函数
from mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
# 工具函数:获取文件扩展名
def GetFileExtension(name: str) -> str:
parts = name.rsplit('.', 1)
return parts[1].lower() if len(parts) > 1 else ""
# 获取 ExtendNameID基于文件名后缀
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
ext = GetFileExtension(name)
if not ext:
return 0
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
result = cursor.fetchone()
return result[0] if result else 0
# 获取 DirLayer路径层级
def GetDirLayer(path: str) -> int:
path = path.strip()
if not path or path == "\\":
return 0
return path.count("\\") - 1
# 获取 GroupID默认第一个
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取 UserID默认第一个
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
def GetFilesTime(file_path):
"""
获取指定文件的创建时间、修改时间、访问时间和权限变更时间。
st_atime: 最后一次访问时间FileAccessTime
st_mtime: 最后一次修改内容的时间FileModifyTime
st_ctime: 文件元数据metadata更改时间在 Windows 中是文件创建时间FileCreateTime
参数:
file_path (str): 文件的绝对路径
返回:
dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式,
如果无法获取则返回 "default"
"""
if not os.path.exists(file_path):
return {
"FileCreateTime": "default",
"FileModifyTime": "default",
"FileAccessTime": "default",
"FileAuthTime": "default"
}
try:
stat_info = os.stat(file_path)
def ts_to_str(timestamp):
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
create_time = ts_to_str(stat_info.st_ctime)
modify_time = ts_to_str(stat_info.st_mtime)
access_time = ts_to_str(stat_info.st_atime)
# 权限变更时间Windows 下可能不适用
try:
auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime))
except Exception:
auth_time = "default"
return {
"FileCreateTime": create_time,
"FileModifyTime": modify_time,
"FileAccessTime": access_time,
"FileAuthTime": auth_time
}
except Exception as e:
print(f"❌ 获取文件时间失败: {e}")
return {
"FileCreateTime": "default",
"FileModifyTime": "default",
"FileAccessTime": "default",
"FileAuthTime": "default"
}
# 获取设备IDdb_device第一条记录
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取文件大小(伪数据)
def GetFileSize(file80h_pattern):
if file80h_pattern[0].get('is_resident'):
return GetFragmentData(file80h_pattern)[0].get('byte_length')
else:
size_list = ExtractSequenceHexValues(file80h_pattern)[56:64]
size = hex_list_to_int(size_list)
return size
# 获取文件内容哈希(伪数据)
def GetFileHash(full_path: str) -> str:
return hashlib.sha256(full_path.encode()).hexdigest()
# 新增:获取文件片段位置和长度
def GetFragmentLocation(fragment):
return fragment.get('starting_byte', 0)
def GetFragmentLength(fragment):
return fragment.get('byte_length', 0)
# 主函数:将 db_path 数据导入 db_node
# 主函数:将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
device_id = GetDeviceId(cursor)
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
rows = cursor.fetchall()
for row in rows:
path_id, full_path, name, parent_id = row
# 检查是否已存在相同 PathID
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
exists = cursor.fetchone()[0]
if exists > 0:
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
continue
# 获取文件的80h属性数据
try:
file80h_pattern = GetFile80hPattern(full_path)
fragments = GetFragmentData(file80h_pattern)
extent_count = min(len(fragments), 4) # 最多支持4个fragment
print(f"✅ 分片数量为: {extent_count}")
except Exception as e:
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
fragments = []
extent_count = 0
# 计算字段
name_hash = hashlib.sha256(name.encode()).hexdigest()
dir_layer = GetDirLayer(full_path)
extend_name_id = GetExtendNameId(name, cursor)
# ✅ 现在可以安全调用 GetFileSize(file80h_pattern)
try:
file_size = GetFileSize(file80h_pattern)
except Exception as e:
print(f"⚠️ 获取文件大小失败,使用默认值 0: {e}")
file_size = 0
file_hash = GetFileHash(full_path)
# 获取文件的时间属性
file_times = GetFilesTime(full_path)
create_time = file_times["FileCreateTime"]
modify_time = file_times["FileModifyTime"]
access_time = file_times["FileAccessTime"]
auth_time = file_times["FileAuthTime"]
# 查询 PathHash
cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
path_hash_result = cursor.fetchone()
path_hash = path_hash_result[0] if path_hash_result else ""
# 构建插入语句字段和参数(保持原样)
fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
]
values = [
path_id, parent_id, name_hash, path_hash,
extend_name_id, dir_layer, group_id, user_id,
create_time, modify_time, access_time, auth_time,
file_size, 'default', file_hash, extent_count
]
# 处理 Extent 片段字段
extent_data = []
for i in range(4): # 最多4个 extent
if i < len(fragments):
frag = fragments[i]
location = GetFragmentLocation(frag)
length = GetFragmentLength(frag)
extent_data.extend([device_id, location, length])
else:
extent_data.extend([None, None, None])
# 拼接字段和值
extent_fields = [
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
]
fields += extent_fields
values += extent_data
# 构建 SQL 插入语句
placeholders = ', '.join('?' * len(values))
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
# 执行插入
cursor.execute(insert_sql, values)
conn.commit()
conn.close()
print(f"✅ 数据已成功插入到 {table_name}")
if __name__ == '__main__':
InsertNodeDataToDB()