258 lines
8.4 KiB
Python
258 lines
8.4 KiB
Python
import hashlib
|
||
import os
|
||
import sqlite3
|
||
from datetime import datetime
|
||
|
||
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
||
from ntfs_utils.main import volume_letter
|
||
|
||
|
||
# 工具函数:获取文件扩展名
|
||
def GetFileExtension(name: str) -> str:
|
||
parts = name.rsplit('.', 1)
|
||
return parts[1].lower() if len(parts) > 1 else ""
|
||
|
||
|
||
# 获取 ExtendNameID(基于文件名后缀)
|
||
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
|
||
ext = GetFileExtension(name)
|
||
if not ext:
|
||
return 0
|
||
|
||
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取 DirLayer(路径层级)
|
||
def GetDirLayer(path: str) -> int:
|
||
path = path.strip()
|
||
if not path or path == "\\":
|
||
return 0
|
||
return path.count("\\") - 1
|
||
|
||
|
||
# 获取 GroupID(默认第一个)
|
||
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取 UserID(默认第一个)
|
||
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
def GetFilesTime(file_path):
|
||
"""
|
||
获取指定文件的创建时间、修改时间、访问时间和权限变更时间。
|
||
st_atime: 最后一次访问时间(FileAccessTime)
|
||
st_mtime: 最后一次修改内容的时间(FileModifyTime)
|
||
st_ctime: 文件元数据(metadata)更改时间,在 Windows 中是文件创建时间(FileCreateTime)
|
||
参数:
|
||
file_path (str): 文件的绝对路径
|
||
|
||
返回:
|
||
dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式,
|
||
如果无法获取则返回 "default"。
|
||
"""
|
||
if not os.path.exists(file_path):
|
||
return {
|
||
"FileCreateTime": "default",
|
||
"FileModifyTime": "default",
|
||
"FileAccessTime": "default",
|
||
"FileAuthTime": "default"
|
||
}
|
||
|
||
try:
|
||
stat_info = os.stat(file_path)
|
||
|
||
def ts_to_str(timestamp):
|
||
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
|
||
|
||
create_time = ts_to_str(stat_info.st_ctime)
|
||
modify_time = ts_to_str(stat_info.st_mtime)
|
||
access_time = ts_to_str(stat_info.st_atime)
|
||
|
||
# 权限变更时间,Windows 下可能不适用
|
||
try:
|
||
auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime))
|
||
except Exception:
|
||
auth_time = "default"
|
||
|
||
return {
|
||
"FileCreateTime": create_time,
|
||
"FileModifyTime": modify_time,
|
||
"FileAccessTime": access_time,
|
||
"FileAuthTime": auth_time
|
||
}
|
||
|
||
except Exception as e:
|
||
print(f"❌ 获取文件时间失败: {e}")
|
||
return {
|
||
"FileCreateTime": "default",
|
||
"FileModifyTime": "default",
|
||
"FileAccessTime": "default",
|
||
"FileAuthTime": "default"
|
||
}
|
||
|
||
|
||
# 获取设备ID(db_device第一条记录)
|
||
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取文件大小(伪数据)
|
||
def GetFileSize(file80h_pattern):
|
||
if file80h_pattern[0].get('is_resident'):
|
||
return GetFragmentData(file80h_pattern)[0].get('byte_length')
|
||
else:
|
||
size_list = ExtractSequenceHexValues(file80h_pattern)[56:64]
|
||
size = hex_list_to_int(size_list)
|
||
return size
|
||
|
||
|
||
# 获取文件内容哈希(伪数据)
|
||
def GetFileHash(full_path: str) -> str:
|
||
return hashlib.sha256(full_path.encode()).hexdigest()
|
||
|
||
|
||
# 新增:获取文件片段位置和长度
|
||
def GetFragmentLocation(fragment):
|
||
return fragment.get('starting_byte', 0)
|
||
|
||
|
||
def GetFragmentLength(fragment):
|
||
return fragment.get('byte_length', 0)
|
||
|
||
|
||
# 主函数:将 db_path 数据导入 db_node
|
||
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
||
conn = sqlite3.connect(db_path)
|
||
cursor = conn.cursor()
|
||
|
||
if len(volume_letter) == 1:
|
||
volume_root = f"{volume_letter}:\\"
|
||
elif volume_letter.endswith(':'):
|
||
volume_root = f"{volume_letter}\\"
|
||
else:
|
||
volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入
|
||
|
||
print(f"🔍 当前处理磁盘根目录:{volume_root}")
|
||
|
||
group_id = GetFirstGroupId(cursor)
|
||
user_id = GetFirstUserId(cursor)
|
||
device_id = GetDeviceId(cursor)
|
||
|
||
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
|
||
rows = cursor.fetchall()
|
||
|
||
insert_fields = [
|
||
'PathID', 'ParentID', 'NameHash', 'PathHash',
|
||
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
|
||
'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
|
||
'FileSize', 'FileMode', 'FileHash', 'ExtentCount',
|
||
# extent 字段
|
||
"extent1_DeviceID", "extent1_Location", "extent1_Length",
|
||
"extent2_DeviceID", "extent2_Location", "extent2_Length",
|
||
"extent3_DeviceID", "extent3_Location", "extent3_Length",
|
||
"extent4_DeviceID", "extent4_Location", "extent4_Length"
|
||
]
|
||
insert_placeholders = ', '.join('?' * len(insert_fields))
|
||
insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})"
|
||
|
||
batch = []
|
||
|
||
for row in rows:
|
||
path_id, relative_path, name, parent_id = row
|
||
|
||
full_path = os.path.join(volume_root, relative_path)
|
||
|
||
# 检查是否已存在相同 PathID
|
||
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
|
||
exists = cursor.fetchone()[0]
|
||
if exists > 0:
|
||
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
|
||
continue
|
||
|
||
try:
|
||
file80h_pattern = GetFile80hPattern(full_path)
|
||
fragments = GetFragmentData(file80h_pattern)
|
||
extent_count = min(len(fragments), 4)
|
||
except Exception as e:
|
||
print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}")
|
||
fragments = []
|
||
extent_count = 0
|
||
|
||
# 计算字段
|
||
name_hash = hashlib.sha256(name.encode()).hexdigest()
|
||
dir_layer = GetDirLayer(relative_path)
|
||
extend_name_id = GetExtendNameId(name, cursor)
|
||
|
||
try:
|
||
file_size = GetFileSize(file80h_pattern)
|
||
except Exception as e:
|
||
print(f"⚠️ 获取文件大小失败,使用默认值 0: {e}")
|
||
file_size = 0
|
||
|
||
file_hash = GetFileHash(full_path)
|
||
|
||
# 获取时间信息
|
||
file_times = GetFilesTime(full_path)
|
||
create_time = file_times["FileCreateTime"]
|
||
modify_time = file_times["FileModifyTime"]
|
||
access_time = file_times["FileAccessTime"]
|
||
auth_time = file_times["FileAuthTime"]
|
||
|
||
# 查询 PathHash
|
||
cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
|
||
path_hash_result = cursor.fetchone()
|
||
path_hash = path_hash_result[0] if path_hash_result else ""
|
||
|
||
# 构建 extent 字段
|
||
extent_data = []
|
||
for i in range(4): # 最多4个 extent
|
||
if i < len(fragments):
|
||
frag = fragments[i]
|
||
location = GetFragmentLocation(frag)
|
||
length = GetFragmentLength(frag)
|
||
extent_data.extend([device_id, location, length])
|
||
else:
|
||
extent_data.extend([None, None, None])
|
||
|
||
# 构建插入数据
|
||
values = [
|
||
path_id, parent_id, name_hash, path_hash,
|
||
extend_name_id, dir_layer, group_id, user_id,
|
||
create_time, modify_time, access_time, auth_time,
|
||
file_size, 'default', file_hash, extent_count,
|
||
*extent_data
|
||
]
|
||
|
||
batch.append(values)
|
||
|
||
# 批量插入
|
||
if len(batch) >= batch_size:
|
||
cursor.executemany(insert_sql, batch)
|
||
conn.commit()
|
||
print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}")
|
||
batch.clear()
|
||
|
||
# 插入剩余不足一批的数据
|
||
if batch:
|
||
cursor.executemany(insert_sql, batch)
|
||
conn.commit()
|
||
print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}")
|
||
|
||
conn.close()
|
||
print(f"✅ 数据已成功插入到 {table_name} 表")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
InsertNodeDataToDB()
|