import hashlib import os import sqlite3 from datetime import datetime from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int from ntfs_utils.main import volume_letter # 工具函数:获取文件扩展名 def GetFileExtension(name: str) -> str: parts = name.rsplit('.', 1) return parts[1].lower() if len(parts) > 1 else "" # 获取 ExtendNameID(基于文件名后缀) def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int: ext = GetFileExtension(name) if not ext: return 0 cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,)) result = cursor.fetchone() return result[0] if result else 0 # 获取 DirLayer(路径层级) def GetDirLayer(path: str) -> int: path = path.strip() if not path or path == "\\": return 0 return path.count("\\") - 1 # 获取 GroupID(默认第一个) def GetFirstGroupId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 # 获取 UserID(默认第一个) def GetFirstUserId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 def GetFilesTime(file_path): """ 获取指定文件的创建时间、修改时间、访问时间和权限变更时间。 st_atime: 最后一次访问时间(FileAccessTime) st_mtime: 最后一次修改内容的时间(FileModifyTime) st_ctime: 文件元数据(metadata)更改时间,在 Windows 中是文件创建时间(FileCreateTime) 参数: file_path (str): 文件的绝对路径 返回: dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式, 如果无法获取则返回 "default"。 """ if not os.path.exists(file_path): return { "FileCreateTime": "default", "FileModifyTime": "default", "FileAccessTime": "default", "FileAuthTime": "default" } try: stat_info = os.stat(file_path) def ts_to_str(timestamp): return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') create_time = ts_to_str(stat_info.st_ctime) modify_time = ts_to_str(stat_info.st_mtime) access_time = ts_to_str(stat_info.st_atime) # 权限变更时间,Windows 下可能不适用 try: auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime)) except Exception: auth_time = "default" return { "FileCreateTime": create_time, "FileModifyTime": modify_time, "FileAccessTime": access_time, "FileAuthTime": auth_time } except Exception as e: print(f"❌ 获取文件时间失败: {e}") return { "FileCreateTime": "default", "FileModifyTime": "default", "FileAccessTime": "default", "FileAuthTime": "default" } # 获取设备ID(db_device第一条记录) def GetDeviceId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 # 获取文件大小(伪数据) def GetFileSize(file80h_pattern): if not file80h_pattern or not isinstance(file80h_pattern, list): return 0 if file80h_pattern[0].get('is_resident'): fragments = GetFragmentData(file80h_pattern) if fragments and len(fragments) > 0: return fragments[0].get('byte_length', 0) else: sequence_list = ExtractSequenceHexValues(file80h_pattern) if len(sequence_list) < 64: raise ValueError("序列长度不足,无法解析文件大小") size_list = sequence_list[56:64] size = hex_list_to_int(size_list) return size # 获取文件内容哈希(伪数据) def GetFileHash(full_path: str) -> str: return hashlib.sha256(full_path.encode()).hexdigest() # 新增:获取文件片段位置和长度 def GetFragmentLocation(fragment): return fragment.get('starting_byte', 0) def GetFragmentLength(fragment): return fragment.get('byte_length', 0) # 主函数:将 db_path 数据导入 db_node def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20): conn = sqlite3.connect(db_path) cursor = conn.cursor() if len(volume_letter) == 1: volume_root = f"{volume_letter}:\\" elif volume_letter.endswith(':'): volume_root = f"{volume_letter}\\" else: volume_root = f"{volume_letter}:\\" # 支持 "Y" 或 "Y:" 输入 print(f"🔍 当前处理磁盘根目录:{volume_root}") group_id = GetFirstGroupId(cursor) user_id = GetFirstUserId(cursor) device_id = GetDeviceId(cursor) cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path") rows = cursor.fetchall() insert_fields = [ 'PathID', 'ParentID', 'NameHash', 'PathHash', 'ExtendNameID', 'DirLayer', 'GroupID', 'UserID', 'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime', 'FileSize', 'FileMode', 'FileHash', 'ExtentCount', # extent 字段 "extent1_DeviceID", "extent1_Location", "extent1_Length", "extent2_DeviceID", "extent2_Location", "extent2_Length", "extent3_DeviceID", "extent3_Location", "extent3_Length", "extent4_DeviceID", "extent4_Location", "extent4_Length" ] insert_placeholders = ', '.join('?' * len(insert_fields)) insert_sql = f"INSERT INTO {table_name} ({', '.join(insert_fields)}) VALUES ({insert_placeholders})" batch = [] for row in rows: path_id, relative_path, name, parent_id = row full_path = os.path.join(volume_root, relative_path) # 检查是否已存在相同 PathID cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,)) exists = cursor.fetchone()[0] if exists > 0: print(f"⚠️ PathID {path_id} 已存在,跳过插入") continue try: file80h_pattern = GetFile80hPattern(full_path) fragments = GetFragmentData(file80h_pattern) extent_count = min(len(fragments), 4) except Exception as e: print(f"⚠️ 获取 ExtentCount 失败,使用默认值 0: {e}") fragments = [] extent_count = 0 # 计算字段 name_hash = hashlib.sha256(name.encode()).hexdigest() dir_layer = GetDirLayer(relative_path) extend_name_id = GetExtendNameId(name, cursor) try: file_size = GetFileSize(file80h_pattern) except Exception as e: print(f"⚠️ 获取文件大小失败,使用默认值 0: {e}") file_size = 0 file_hash = GetFileHash(full_path) # 获取时间信息 file_times = GetFilesTime(full_path) create_time = file_times["FileCreateTime"] modify_time = file_times["FileModifyTime"] access_time = file_times["FileAccessTime"] auth_time = file_times["FileAuthTime"] # 查询 PathHash cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,)) path_hash_result = cursor.fetchone() path_hash = path_hash_result[0] if path_hash_result else "" # 构建 extent 字段 extent_data = [] for i in range(4): # 最多4个 extent if i < len(fragments): frag = fragments[i] location = GetFragmentLocation(frag) length = GetFragmentLength(frag) extent_data.extend([device_id, location, length]) else: extent_data.extend([None, None, None]) # 构建插入数据 values = [ path_id, parent_id, name_hash, path_hash, extend_name_id, dir_layer, group_id, user_id, create_time, modify_time, access_time, auth_time, file_size, 'default', file_hash, extent_count, *extent_data ] batch.append(values) # 批量插入 if len(batch) >= batch_size: cursor.executemany(insert_sql, batch) conn.commit() print(f"✅ 提交一批 {len(batch)} 条记录到 {table_name}") batch.clear() # 插入剩余不足一批的数据 if batch: cursor.executemany(insert_sql, batch) conn.commit() print(f"✅ 提交最后一批 {len(batch)} 条记录到 {table_name}") conn.close() print(f"✅ 数据已成功插入到 {table_name} 表") if __name__ == "__main__": InsertNodeDataToDB()