diff --git a/db_manage/create_tables.py b/db_manage/create_tables.py index a2e5fe9..dc74765 100644 --- a/db_manage/create_tables.py +++ b/db_manage/create_tables.py @@ -86,9 +86,9 @@ def CreateDBDeviceTable(db_path='../src/db_ntfs_info.db', table_name='db_device' def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'): """ - 在指定路径下创建 SQLite 数据库,并在其中创建节点信息表。 + 创建 NewDBNode 表,用于存储文件的具体属性和物理分布信息。 - :param db_path: str, 数据库文件的路径 + :param db_path: str, 数据库文件路径 :param table_name: str, 要创建的表名 :return: None """ @@ -100,28 +100,18 @@ def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'): # 连接到SQLite数据库(如果文件不存在会自动创建) conn = sqlite3.connect(db_path) - - # 创建一个游标对象 + conn.execute("PRAGMA foreign_keys = ON;") # 开启外键支持 cursor = conn.cursor() # 动态构建创建表的SQL语句 create_table_sql = f""" CREATE TABLE IF NOT EXISTS {table_name} ( ID INTEGER PRIMARY KEY AUTOINCREMENT, - PathID INTEGER, - ParentID INTEGER, - NameHash TEXT, - PathHash TEXT, + PathID INTEGER NOT NULL, ExtendNameID INTEGER, - DirLayer INTEGER, GroupID INTEGER, UserID INTEGER, - FileCreateTime TEXT, - FileModifyTime TEXT, - FileAccessTime TEXT, - FileAuthTime TEXT, FileSize INTEGER, - FileMode INTEGER, FileHash TEXT, ExtentCount INTEGER, extent1_DeviceID INTEGER, @@ -137,21 +127,17 @@ def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'): extent4_Location INTEGER, extent4_Length INTEGER, - -- 外键约束(可选) - FOREIGN KEY(PathID) REFERENCES path_table(ID), - FOREIGN KEY(ExtendNameID) REFERENCES extname_table(ID), - FOREIGN KEY(GroupID) REFERENCES groups(ID), - FOREIGN KEY(UserID) REFERENCES users(ID) + -- 外键约束 + FOREIGN KEY(PathID) REFERENCES NewDBPath(ID), + FOREIGN KEY(ExtendNameID) REFERENCES db_extend(ID), + FOREIGN KEY(GroupID) REFERENCES db_group(ID), + FOREIGN KEY(UserID) REFERENCES db_user(ID) ); """ # 执行SQL语句 cursor.execute(create_table_sql) - - # 提交更改 conn.commit() - - # 关闭连接 conn.close() print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功") @@ -280,12 +266,11 @@ def CreateDBExtendSnippetTable(db_path='../src/db_ntfs_info.db', table_name='db_ print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功") -def CreateDBPathTable(db_path='../src/db_path.db', table_name='db_path'): +def CreateDBPathTable(db_path='../src/db_ntfs_info.db', table_name='db_path'): """ - 在指定路径下创建 SQLite 数据库,并在其中创建路径信息表, - 包含 DeviceID 字段,用于标记文件所属设备(磁盘)。 + 创建 NewDBPath 表,用于存储文件/目录的路径信息。 - :param db_path: str, 数据库文件的路径 + :param db_path: str, 数据库文件路径 :param table_name: str, 要创建的表名 :return: None """ @@ -295,39 +280,42 @@ def CreateDBPathTable(db_path='../src/db_path.db', table_name='db_path'): if directory and not os.path.exists(directory): os.makedirs(directory) - # 连接到SQLite数据库(如果文件不存在会自动创建) + # 连接到SQLite数据库(如果不存在会自动创建) conn = sqlite3.connect(db_path) + conn.execute("PRAGMA foreign_keys = ON;") # 开启外键支持 cursor = conn.cursor() - # 动态构建创建表的SQL语句(包含 DeviceID 外键) + # 动态构建创建表的SQL语句 create_table_sql = f""" CREATE TABLE IF NOT EXISTS {table_name} ( ID INTEGER PRIMARY KEY AUTOINCREMENT, --- DeviceID TEXT NOT NULL, Path TEXT NOT NULL, Name TEXT NOT NULL, + DirLayer INTEGER NOT NULL, PathHash TEXT UNIQUE NOT NULL, IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)), ParentID INTEGER, ContentSize INTEGER, + FileCreateTime TEXT, + FileModifyTime TEXT, + FileAccessTime TEXT, + FileAuthTime TEXT, + FileMode TEXT, - -- 外键约束 --- FOREIGN KEY(DeviceID) REFERENCES db_device(ID), + -- 外键约束(可选) FOREIGN KEY(ParentID) REFERENCES {table_name}(ID) ); """ # 执行SQL语句 cursor.execute(create_table_sql) - - # 提交更改 conn.commit() conn.close() print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功") -def CreateDBExtendNameTable(db_path='../src/db_extend_name.db', table_name='db_extend_name'): +def CreateDBExtendNameTable(db_path='../src/db_ntfs_info.db', table_name='db_extend_name'): """ 在指定路径下创建 SQLite 数据库,并在其中创建扩展名表。 diff --git a/ntfs_utils/db_path.py b/ntfs_utils/db_path.py index 5d5abf6..5eb2e51 100644 --- a/ntfs_utils/db_path.py +++ b/ntfs_utils/db_path.py @@ -23,10 +23,37 @@ def ShouldSkipPath(path: str) -> bool: return False +def GetDirLayer(full_path: str, volume_letter: str) -> int: + """ + 根据路径计算目录层级。 + + 示例: + Z:\demo.txt → 0 + Z:\folder\test.txt → 1 + Z:\folder\subfolder\file.txt → 2 + + 参数: + full_path: str,完整路径 + volume_letter: str,磁盘盘符(如 'Z') + + 返回: + int,层级数 + """ + root_prefix = f"{volume_letter.upper()}:\\" + if not full_path.startswith(root_prefix): + return -1 # 非法路径 + + relative_path = full_path[len(root_prefix):] + if not relative_path: + return 0 # 根目录层级为 0 + + return len(relative_path.split(os.sep)) - 1 + + def ScanVolume(volume_letter: str): """ 完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹, - 并为每个节点分配 ParentID。 + 并为每个节点分配 ParentID 和 DirLayer。 返回: list of dict:包含文件/目录信息的字典列表 @@ -36,7 +63,7 @@ def ScanVolume(volume_letter: str): raise ValueError(f"磁盘 {root_path} 不存在") result = [] - path_to_id = {} # 用于记录路径到数据库 ID 的映射 + path_to_id = {} # 用于记录路径到 ID 的映射 counter = 1 # 模拟数据库自增 ID for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False): @@ -61,17 +88,20 @@ def ScanVolume(volume_letter: str): name = entry - # ✅ 修正点:对 Path 字段进行哈希 + # ✅ 对 Path 字段进行哈希 path_hash = GenerateHash(full_path) - # 计算 ContentSize(KB),小文件至少显示为 1 KB + # ✅ 计算 ContentSize(KB),小文件至少显示为 1 KB content_size = bytes_size // 1024 if content_size == 0 and bytes_size > 0: content_size = 1 - # 获取父目录路径 + # ✅ 获取父目录路径 parent_path = os.path.dirname(full_path) - parent_id = path_to_id.get(parent_path, 0) # 默认为 0(根目录可能未录入) + parent_id = path_to_id.get(parent_path, 0) + + # ✅ 计算 DirLayer(目录层级) + dir_layer = GetDirLayer(full_path, volume_letter) item = { "ID": counter, @@ -80,7 +110,13 @@ def ScanVolume(volume_letter: str): "PathHash": path_hash, "IsDir": is_dir, "ParentID": parent_id, - "ContentSize": content_size + "ContentSize": content_size, + "DirLayer": dir_layer, + "FileCreateTime": "default", + "FileModifyTime": "default", + "FileAccessTime": "default", + "FileAuthTime": "default", + "FileMode": "default" } result.append(item) @@ -93,9 +129,15 @@ def ScanVolume(volume_letter: str): return result -def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20): +def InsertNewDBPathToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20): """ - 批量将扫描结果写入数据库。 + 批量将扫描结果写入 NewDBPath 表中,支持新字段。 + + 参数: + data: list of dict,扫描结果数据 + db_path: str,SQLite 数据库路径 + table_name: str,目标表名 + batch_size: int,每多少条提交一次 """ conn = sqlite3.connect(db_path) cursor = conn.cursor() @@ -111,6 +153,12 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)), ParentID INTEGER, ContentSize INTEGER, + DirLayer INTEGER NOT NULL, + FileCreateTime TEXT, + FileModifyTime TEXT, + FileAccessTime TEXT, + FileAuthTime TEXT, + FileMode TEXT, FOREIGN KEY(ParentID) REFERENCES {table_name}(ID) ); @@ -120,8 +168,8 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa # 插入语句(忽略重复 PathHash) insert_sql = f""" INSERT OR IGNORE INTO {table_name} - (Path, Name, PathHash, IsDir, ParentID, ContentSize) - VALUES (?, ?, ?, ?, ?, ?) + (Path, Name, PathHash, IsDir, ParentID, ContentSize, DirLayer, FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime, FileMode) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ total_inserted = 0 @@ -134,7 +182,13 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa item['PathHash'], item['IsDir'], item['ParentID'] or 0, - item['ContentSize'] + item['ContentSize'], + item['DirLayer'], + item['FileCreateTime'], + item['FileModifyTime'], + item['FileAccessTime'], + item['FileAuthTime'], + item['FileMode'] )) if len(batch) >= batch_size: @@ -151,7 +205,7 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa total_inserted += cursor.rowcount print(f"✅ 提交最后一批 {len(batch)} 条数据") - print(f"✅ 总共插入 {total_inserted} 条记录到数据库。") + print(f"✅ 总共插入 {total_inserted} 条记录到 NewDBPath 表。") except Exception as e: print(f"❌ 插入失败: {e}") @@ -169,9 +223,9 @@ def main(): scanned_data = ScanVolume(volume_letter) print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...") - InsertPathDataToDB(scanned_data) + InsertNewDBPathToDB(scanned_data) - print("✅ 全盘扫描与入库完成") + print("✅ 全盘扫描与 NewDBPath 表入库完成") if __name__ == "__main__": diff --git a/src/db_ntfs_info.db b/src/db_ntfs_info.db index ce7cfdf..fff2a90 100644 Binary files a/src/db_ntfs_info.db and b/src/db_ntfs_info.db differ