From 4d7c2e995c52e49e52c9d5334bdac56fe2ca3a41 Mon Sep 17 00:00:00 2001 From: Burgess Leo <1799594843@qq.com> Date: Tue, 27 May 2025 13:10:13 +0800 Subject: [PATCH] project done but test failed --- fake_main.py | 16 ++++---------- files_utils/folders_save.py | 24 ++++++++++----------- ntfs_utils/db_node.py | 13 +++++++++-- ntfs_utils/mft_analyze.py | 43 ++++++++++++------------------------- 4 files changed, 41 insertions(+), 55 deletions(-) diff --git a/fake_main.py b/fake_main.py index 7f46b32..a6c4c0f 100644 --- a/fake_main.py +++ b/fake_main.py @@ -5,17 +5,9 @@ from files_utils.files_sort import GetSortFragments from files_utils.folders_sort import ClassifyFilesAndFolders, ScanMultiFolders fragment_lists = {} +target_path = r"Z:\test_files" mix_test_data = [ - "CloudMusic\\AGA - MIZU.mp3", - "CloudMusic/AGA - 一.mp3", - "CloudMusic/Aaron Zigman - Main Title.mp3", - "CloudMusic/Anson Seabra - Keep Your Head Up Princess.mp3", - "CloudMusic/Anthony Keyrouz,Romy Wave - Something Just Like This (feat. Romy Wave).mp3", - "CloudMusic/Ava Max - Sweet but Psycho.mp3", - "CloudMusic\\", - "folder1/", - "CloudMusic/Cecilia Cheung - Turn Into Fireworks and Fall for You.mp3", - "CloudMusic/Color Music Choir - Something Just Like This (Live).mp3" + "test-copy" ] classify_files_and_folders = ClassifyFilesAndFolders(mix_test_data) files_list = classify_files_and_folders["files"] @@ -26,6 +18,6 @@ sort_fragments = GetSortFragments(db_path="./src/db_ntfs_info.db", files_list=me for item in sort_fragments: extent_count = item['extent_count'] if extent_count == 1: - CopySingleFragmentFiles(item, target_path=r"Z:/test_files") + CopySingleFragmentFiles(item, target_path=target_path) elif extent_count > 1: - CopyMultiFragmentFiles(item, fragment_lists=fragment_lists, target_path=r"Z:/test_files") + CopyMultiFragmentFiles(item, fragment_lists=fragment_lists, target_path=target_path) diff --git a/files_utils/folders_save.py b/files_utils/folders_save.py index 601881a..6c85f21 100644 --- a/files_utils/folders_save.py +++ b/files_utils/folders_save.py @@ -1,14 +1,14 @@ -import sqlite3 +import subprocess -db_path = "../src/db_ntfs_info.db" -conn = sqlite3.connect(db_path) -cursor = conn.cursor() +source_path = r"Y:\\test-copy" +target_path = r"Z:\\test-copy" - -def GetDeviceId(cursor: sqlite3.Cursor) -> int: - cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1") - result = cursor.fetchone() - return result[0] if result else 0 - - -print(GetDeviceId(cursor)) +subprocess.run([ + "robocopy", + source_path, + target_path, + "/E", # 包括子目录 + "/R:3", # 重试次数 + "/W:1", # 重试等待时间 + "/MT:16" # 多线程(16线程) +]) diff --git a/ntfs_utils/db_node.py b/ntfs_utils/db_node.py index 284dbfb..75187a5 100644 --- a/ntfs_utils/db_node.py +++ b/ntfs_utils/db_node.py @@ -109,10 +109,19 @@ def GetDeviceId(cursor: sqlite3.Cursor) -> int: # 获取文件大小(伪数据) def GetFileSize(file80h_pattern): + if not file80h_pattern or not isinstance(file80h_pattern, list): + return 0 + if file80h_pattern[0].get('is_resident'): - return GetFragmentData(file80h_pattern)[0].get('byte_length') + fragments = GetFragmentData(file80h_pattern) + if fragments and len(fragments) > 0: + return fragments[0].get('byte_length', 0) else: - size_list = ExtractSequenceHexValues(file80h_pattern)[56:64] + sequence_list = ExtractSequenceHexValues(file80h_pattern) + if len(sequence_list) < 64: + raise ValueError("序列长度不足,无法解析文件大小") + + size_list = sequence_list[56:64] size = hex_list_to_int(size_list) return size diff --git a/ntfs_utils/mft_analyze.py b/ntfs_utils/mft_analyze.py index d69a39e..3358e7b 100644 --- a/ntfs_utils/mft_analyze.py +++ b/ntfs_utils/mft_analyze.py @@ -251,50 +251,32 @@ def ExtractSequenceHexValues(file80h_pattern): return sequence_list -def ExportDataRunList(data_run): +def ExportDataRunList(data_run_list): """ - 将 data_run 中的多个 Data Run 提取为独立的 list 片段。 - - 参数: - data_run (list): 十六进制字符串组成的列表,表示 Data Run 内容 - - 返回: - list: 每个元素是一个代表单个 Data Run 的 list + 将 data_run_list 拆分成多个独立的 Data Run 片段。 """ result = [] pos = 0 - - while pos < len(data_run): - current_byte = data_run[pos] - + while pos < len(data_run_list): + current_byte = data_run_list[pos] if current_byte == '00': - # 遇到空运行块,停止解析 break - try: header = int(current_byte, 16) len_bytes = (header >> 4) & 0x0F offset_bytes = header & 0x0F - if len_bytes == 0 or offset_bytes == 0: - print(f"⚠️ 无效的字段长度,跳过位置 {pos}") + run_length = 1 + offset_bytes + len_bytes + if pos + run_length > len(data_run_list): + print(f"⚠️ 数据越界,停止解析") break - # 计算当前 Data Run 总长度 - run_length = 1 + offset_bytes + len_bytes - - # 截取当前 Data Run - fragment = data_run[pos: pos + run_length] - + fragment = data_run_list[pos: pos + run_length] result.append(fragment) - - # 移动指针 pos += run_length - except Exception as e: - print(f"❌ 解析失败,位置 {pos}:{e}") - break - + print(f"❌ 解析 Data Run 失败:位置 {pos}, 错误: {e}") + pos += 1 # 跳过一个字节继续解析 return result @@ -326,6 +308,10 @@ def parse_data_run(data_run, previous_cluster=0, cluster_size=512): len_bytes = (header >> 4) & 0x0F offset_bytes = header & 0x0F + if len(data_run) < 1 + offset_bytes + len_bytes: + print(f"⚠️ 数据长度不足,无法解析 Data Run") + return None + # 提取偏移字段和长度字段 offset_data = data_run[1:1 + offset_bytes] length_data = data_run[1 + offset_bytes:1 + offset_bytes + len_bytes] @@ -416,7 +402,6 @@ def GetFragmentData(file80h_pattern): results = ParseMultipleDataRuns(fragments) return results - # if __name__ == '__main__': # arri80_data = GetFile80hPattern(r"Z:\hello.txt") # data = GetFragmentData(arri80_data)