finish fragment files copy

2025-05-22 13:03:09 +08:00
parent 0c98dfecda
commit 3347abe02f
8 changed files with 252 additions and 96 deletions
--- a/files_utils/files_sort.py
+++ b/files_utils/files_sort.py
@@ -0,0 +1,148 @@
+import sqlite3
+
+
+def GetFilesDBPathInfo(db_path: str = "../src/db_ntfs_info.db",
+                       table_name: str = "db_path",
+                       files_path=None) -> list:
+    """
+    根据传入的文件路径列表，在指定表中查询对应记录的 ID 和 Name 字段。
+
+    :param db_path: 数据库文件路径
+    :param table_name: 要查询的数据表名称
+    :param files_path: 文件的完整路径列表
+    :return: 查询结果列表，每项为 {'absolute_path': str, 'id': int, 'name': str}
+    """
+    if files_path is None:
+        files_path = []
+    results = []
+
+    # 连接数据库
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    for path in files_path:
+        try:
+            # 使用字符串格式化插入表名，参数化查询只适用于值
+            sql = f"SELECT ID, Name FROM {table_name} WHERE Path = ?"
+            cursor.execute(sql, (path,))
+            row = cursor.fetchone()
+            if row:
+                results.append({
+                    'absolute_path': path,
+                    'id': row[0],
+                    'name': row[1]
+                })
+            else:
+                print(f"未找到匹配记录：{path}")
+        except Exception as e:
+            print(f"查询失败：{path}，错误：{e}")
+
+    conn.close()
+    return results
+
+
+def GetFilesDBNodeInfo(db_path: str = "../src/db_ntfs_info.db", table_name: str = "db_node",
+                       path_records: list = None) -> list:
+    """
+    根据 db_path 查询结果中的 ID 去 db_node 表中查找对应的 extent 分片信息。
+
+    :param db_path: 数据库文件路径
+    :param table_name: db_node 表名
+    :param path_records: 来自 get_db_path_info 的结果列表
+    :return: 包含文件分片信息的结果列表
+    """
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+
+    results = []
+
+    for record in path_records:
+        path_id = record['id']
+        absolute_path = record['absolute_path']
+        name = record['name']
+
+        try:
+            # 查询 db_node 表中 PathID 对应的记录
+            cursor.execute(f"SELECT * FROM {table_name} WHERE PathID = ?", (path_id,))
+            row = cursor.fetchone()
+
+            if not row:
+                print(f"未找到 PathID={path_id} 在表 {table_name} 中的记录")
+                continue
+
+            # 获取字段索引（适用于按列名获取）
+            columns = [desc[0] for desc in cursor.description]
+
+            # 构建字典以便按列名访问
+            node_data = dict(zip(columns, row))
+
+            # 获取 ExtentCount
+            extent_count = node_data.get("ExtentCount", 0)
+
+            # 解析分片信息
+            fragments = []
+            for i in range(1, 5):  # extent1 ~ extent4
+                loc = node_data.get(f"extent{i}_Location")
+                length = node_data.get(f"extent{i}_Length")
+
+                if loc is not None and length is not None and length > 0:
+                    fragments.append({
+                        "start_byte": loc,
+                        "length": length
+                    })
+
+            results.append({
+                "absolute_path": absolute_path,
+                "name": name,
+                "path_id": path_id,
+                "extent_count": extent_count,
+                "fragments": fragments
+            })
+
+        except Exception as e:
+            print(f"查询失败：PathID={path_id}, 错误：{e}")
+
+    conn.close()
+    return results
+
+
+def SortFragmentsByStartByte(file_extents_list: list) -> list:
+    """
+    对所有文件的分片按 start_byte 进行排序，并标注是第几个分片。
+
+    :param file_extents_list: get_file_extents_info 返回的结果列表
+    :return: 按 start_byte 排序后的片段列表，包含文件路径、文件名、第几个分片等信息
+    """
+    all_fragments = []
+
+    for file_info in file_extents_list:
+        absolute_path = file_info['absolute_path']
+        filename = file_info['name']
+        extent_count = file_info['extent_count']
+        fragments = file_info['fragments']
+
+        # 对当前文件的片段排序（虽然通常已经是有序的）
+        sorted_fragments = sorted(fragments, key=lambda x: x['start_byte'])
+
+        # 添加片段索引信息
+        for idx, fragment in enumerate(sorted_fragments, start=1):
+            all_fragments.append({
+                'absolute_path': absolute_path,
+                'filename': filename,
+                'extent_count': extent_count,
+                'start_byte': fragment['start_byte'],
+                'length': fragment['length'],
+                'fragment_index': idx
+            })
+
+    # 全局排序：按 start_byte 排序所有片段
+    all_fragments.sort(key=lambda x: x['start_byte'])
+
+    return all_fragments
+
+
+def GetSortFragments(db_path: str = "../src/db_ntfs_info.db", files_list: list = None) -> list:
+    path_info = GetFilesDBPathInfo(db_path=db_path, table_name="db_path", files_path=files_list)
+    node_info = GetFilesDBNodeInfo(db_path=db_path, table_name="db_node", path_records=path_info)
+    result = SortFragmentsByStartByte(node_info)
+    return result