import hashlib import random import sqlite3 from mft_analyze import GetFile80hPattern # 工具函数:获取文件扩展名 def GetFileExtension(name: str) -> str: parts = name.rsplit('.', 1) return parts[1].lower() if len(parts) > 1 else "" # 获取 ExtendNameID(基于文件名后缀) def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int: ext = GetFileExtension(name) if not ext: return 0 cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,)) result = cursor.fetchone() return result[0] if result else 0 # 获取 DirLayer(路径层级) def GetDirLayer(path: str) -> int: # "Z:\demo.jpg" → 0 (根目录文件) # "Z:\pictures\RHCE.jpg" → 1 (一级子目录) path = path.strip() if not path or path == "\\": return 0 # 计算路径中的反斜杠数量,减去根目录的反斜杠 return path.count("\\") - 1 # 获取 GroupID(默认第一个) def GetFirstGroupId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 # 获取 UserID(默认第一个) def GetFirstUserId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 # 获取设备ID(db_device第一条记录) def GetDeviceId(cursor: sqlite3.Cursor) -> int: cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1") result = cursor.fetchone() return result[0] if result else 0 # 获取文件大小(伪数据) def GetFileSize(full_path: str) -> int: return random.randint(100, 999) # 获取文件内容哈希(伪数据) def GetFileHash(full_path: str) -> str: return hashlib.sha256(full_path.encode()).hexdigest() # 获取分片数(1~4) def GetExtentCount(full_path: str) -> int: try: pattern = GetFile80hPattern(full_path) if not pattern: return 1 # 默认值 # 取第一个80h属性(通常文件只有一个80h属性) attr = pattern[0] if attr['is_resident']: return 1 # 常驻属性只有一个分片 else: # 非常驻属性需要解析实际分片数 # 这里简化为从sequence中解析,实际可能需要更复杂的解析 return 1 # 简化处理,实际应根据数据结构解析 except Exception as e: print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1") return 1 # 出错时返回默认值 # 获取随机位置 def GetRandomLocation() -> int: return random.randint(1000, 9999) # 获取随机长度 def GetRandomLength() -> int: return random.randint(1000, 9999) def GetFileLocation(full_path: str) -> int: try: pattern = GetFile80hPattern(full_path) if not pattern: return GetRandomLocation() # 回退到随机值 attr = pattern[0] if attr['is_resident']: # 常驻属性: start_byte + offset + content_offset # 解析content_offset (sequence第三个元素的后4字节) content_offset_bytes = attr['sequence'][2].split()[4:8] content_offset = int.from_bytes( bytes.fromhex(''.join(content_offset_bytes)), byteorder='little' ) return attr['start_byte'] + attr['offset'] + content_offset else: # 非常驻属性需要解析runlist # 这里简化为返回start_byte return attr['start_byte'] except Exception as e: print(f"❌ 获取Location出错: {e}, 使用随机值") return GetRandomLocation() # 出错时返回随机值 def GetFileLength(full_path: str) -> int: try: pattern = GetFile80hPattern(full_path) if not pattern: return GetRandomLength() # 回退到随机值 attr = pattern[0] if attr['is_resident']: # 常驻属性: 解析sequence第三个元素的前4字节 content_length_bytes = attr['sequence'][2].split()[0:4] return int.from_bytes( bytes.fromhex(''.join(content_length_bytes)), byteorder='little' ) else: # 非常驻属性: 从属性头中解析实际大小 return attr['attribute_length'] # 简化处理 except Exception as e: print(f"❌ 获取Length出错: {e}, 使用随机值") return GetRandomLength() # 出错时返回随机值 # 主函数:将 db_path 数据导入 db_node def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'): conn = sqlite3.connect(db_path) cursor = conn.cursor() group_id = GetFirstGroupId(cursor) user_id = GetFirstUserId(cursor) device_id = GetDeviceId(cursor) cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path") rows = cursor.fetchall() inserted_count = 0 # 新增:记录实际插入的条目数 for row in rows: path_id, full_path, name, parent_id = row # 检查是否已存在相同 PathID cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,)) exists = cursor.fetchone()[0] if exists > 0: print(f"⚠️ PathID {path_id} 已存在,跳过插入") continue # 计算字段 name_hash = hashlib.sha256(name.encode()).hexdigest() dir_layer = GetDirLayer(full_path) extend_name_id = GetExtendNameId(name, cursor) file_size = GetFileSize(full_path) file_hash = GetFileHash(full_path) extent_count = GetExtentCount(full_path) # 构建插入语句字段和参数 fields = [ 'PathID', 'ParentID', 'NameHash', 'PathHash', 'ExtendNameID', 'DirLayer', 'GroupID', 'UserID', 'FileSize', 'FileMode', 'FileHash', 'ExtentCount' ] values = [ path_id, parent_id, name_hash, '', # PathHash 待填 extend_name_id, dir_layer, group_id, user_id, file_size, 'default', file_hash, extent_count ] # 查询 PathHash(与 db_path.PathHash 一致) cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,)) path_hash_result = cursor.fetchone() path_hash = path_hash_result[0] if path_hash_result else "" values[3] = path_hash # 替换 PathHash # 处理 Extent 片段字段 extent_data = [] for i in range(1, 5): if i <= extent_count: location = GetRandomLocation() length = GetRandomLength() extent_data.extend([device_id, location, length]) else: extent_data.extend([None, None, None]) # 拼接字段和值 extent_fields = [ "extent1_DeviceID", "extent1_Location", "extent1_Length", "extent2_DeviceID", "extent2_Location", "extent2_Length", "extent3_DeviceID", "extent3_Location", "extent3_Length", "extent4_DeviceID", "extent4_Location", "extent4_Length" ] fields += extent_fields values += extent_data # 构建 SQL 插入语句 placeholders = ', '.join('?' * len(values)) insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})" # 执行插入 cursor.execute(insert_sql, values) inserted_count += 1 # 新增:成功插入后计数器加1 conn.commit() conn.close() # 新增:根据插入结果输出不同信息 if inserted_count > 0: print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name} 表") else: print("ℹ️ 没有新的数据被插入数据库(可能所有条目已存在或没有可处理的数据)") if __name__ == '__main__': InsertNodeDataToDB()