xx

2025-05-19 17:33:30 +08:00
parent 07a4ae7a74
commit 697b449bff
4 changed files with 252 additions and 86 deletions
--- a/db_manage/clear_table_record.py
+++ b/db_manage/clear_table_record.py
@@ -25,5 +25,5 @@ def ClearTableRecordsWithReset(db_path, table_name):
 if __name__ == '__main__':
    # ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_path')
    # ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
-    ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
-    # ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
+    # ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
+    ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
--- a/ntfs_utils/db_node.py
+++ b/ntfs_utils/db_node.py
@@ -1,6 +1,8 @@
 import hashlib
+import os
 import random
 import sqlite3
+from datetime import datetime

 from mft_analyze import GetFile80hPattern

@@ -24,12 +26,9 @@ def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:

 # 获取 DirLayer（路径层级）
 def GetDirLayer(path: str) -> int:
-    # "Z:\demo.jpg" → 0 (根目录文件)
-    # "Z:\pictures\RHCE.jpg" → 1 (一级子目录)
    path = path.strip()
    if not path or path == "\\":
        return 0
-    # 计算路径中的反斜杠数量，减去根目录的反斜杠
    return path.count("\\") - 1


@@ -47,6 +46,62 @@ def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
    return result[0] if result else 0


+def GetFilesTime(file_path):
+    """
+    获取指定文件的创建时间、修改时间、访问时间和权限变更时间。
+    st_atime: 最后一次访问时间（FileAccessTime）
+    st_mtime: 最后一次修改内容的时间（FileModifyTime）
+    st_ctime: 文件元数据（metadata）更改时间，在 Windows 中是文件创建时间（FileCreateTime）
+    注意：Windows 和 Linux 在这些字段的定义上略有不同，比如 Linux 中 st_ctime 是元数据变更时间，而不是创建时间。
+    参数:
+        file_path (str): 文件的绝对路径
+
+    返回:
+        dict: 包含 FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime 的字符串格式，
+              如果无法获取则返回 "default"。
+    """
+    if not os.path.exists(file_path):
+        return {
+            "FileCreateTime": "default",
+            "FileModifyTime": "default",
+            "FileAccessTime": "default",
+            "FileAuthTime": "default"
+        }
+
+    try:
+        stat_info = os.stat(file_path)
+
+        # 将时间戳转换为可读格式字符串 ISO 8601 格式
+        def ts_to_str(timestamp):
+            return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
+
+        create_time = ts_to_str(stat_info.st_ctime)
+        modify_time = ts_to_str(stat_info.st_mtime)
+        access_time = ts_to_str(stat_info.st_atime)
+
+        # 权限变更时间，Linux 上是 metadata 修改时间，Windows 上可能不适用
+        try:
+            auth_time = ts_to_str(getattr(stat_info, 'st_birthtime', stat_info.st_ctime))
+        except Exception:
+            auth_time = "default"
+
+        return {
+            "FileCreateTime": create_time,
+            "FileModifyTime": modify_time,
+            "FileAccessTime": access_time,
+            "FileAuthTime": auth_time
+        }
+
+    except Exception as e:
+        print(f"❌ 获取文件时间失败: {e}")
+        return {
+            "FileCreateTime": "default",
+            "FileModifyTime": "default",
+            "FileAccessTime": "default",
+            "FileAuthTime": "default"
+        }
+
+
 # 获取设备ID（db_device第一条记录）
 def GetDeviceId(cursor: sqlite3.Cursor) -> int:
    cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
@@ -65,24 +120,72 @@ def GetFileHash(full_path: str) -> str:


 # 获取分片数（1~4）
-def GetExtentCount(full_path: str) -> int:
+def GetExtentCount(data):
+    """
+    分析 NTFS 数据结构中的80属性($DATA)，返回文件分片数量
+
+    参数:
+        data (list): 包含字典的列表，每个字典需有'sequence'键
+            (示例结构见问题描述)
+
+    返回:
+        int: 分片数量(常驻属性返回1，非常驻属性返回数据运行的分片数)
+
+    异常:
+        ValueError: 当输入数据无效时抛出
+    """
+    # 第一步：提取并转换sequence数据
+    hex_bytes = []
+    for entry in data:
+        if 'sequence' in entry:
+            for hex_str in entry['sequence']:
+                hex_bytes.extend(hex_str.split())
+
+    # 将十六进制字符串转换为整数列表
    try:
-        pattern = GetFile80hPattern(full_path)
-        if not pattern:
-            return 1  # 默认值
+        attribute_data = [int(x, 16) for x in hex_bytes]
+    except ValueError:
+        raise ValueError("无效的十六进制数据")

-        # 取第一个80h属性(通常文件只有一个80h属性)
-        attr = pattern[0]
+    # 第二步：分析属性结构
+    if len(attribute_data) < 24:
+        raise ValueError("属性数据过短，无法解析头部信息")

-        if attr['is_resident']:
-            return 1  # 常驻属性只有一个分片
-        else:
-            # 非常驻属性需要解析实际分片数
-            # 这里简化为从sequence中解析，实际可能需要更复杂的解析
-            return 1  # 简化处理，实际应根据数据结构解析
-    except Exception as e:
-        print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1")
-        return 1  # 出错时返回默认值
+    # 检查属性类型(0x80)
+    if attribute_data[0] != 0x80:
+        raise ValueError("不是80属性($DATA属性)")
+
+    # 检查是否常驻(偏移0x08)
+    is_resident = attribute_data[8] == 0
+
+    if is_resident:
+        return 1
+    else:
+        # 解析非常驻属性的数据运行列表
+        data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
+
+        if data_run_offset >= len(attribute_data):
+            raise ValueError("数据运行偏移超出属性长度")
+
+        data_runs = attribute_data[data_run_offset:]
+        fragment_count = 0
+        pos = 0
+
+        while pos < len(data_runs):
+            header_byte = data_runs[pos]
+            if header_byte == 0x00:
+                break
+
+            len_len = (header_byte >> 4) & 0x0F
+            offset_len = header_byte & 0x0F
+
+            if len_len == 0 or offset_len == 0:
+                break
+
+            pos += 1 + len_len + offset_len
+            fragment_count += 1
+
+        return fragment_count


 # 获取随机位置
@@ -95,53 +198,6 @@ def GetRandomLength() -> int:
    return random.randint(1000, 9999)


-def GetFileLocation(full_path: str) -> int:
-    try:
-        pattern = GetFile80hPattern(full_path)
-        if not pattern:
-            return GetRandomLocation()  # 回退到随机值
-
-        attr = pattern[0]
-        if attr['is_resident']:
-            # 常驻属性: start_byte + offset + content_offset
-            # 解析content_offset (sequence第三个元素的后4字节)
-            content_offset_bytes = attr['sequence'][2].split()[4:8]
-            content_offset = int.from_bytes(
-                bytes.fromhex(''.join(content_offset_bytes)),
-                byteorder='little'
-            )
-            return attr['start_byte'] + attr['offset'] + content_offset
-        else:
-            # 非常驻属性需要解析runlist
-            # 这里简化为返回start_byte
-            return attr['start_byte']
-    except Exception as e:
-        print(f"❌ 获取Location出错: {e}, 使用随机值")
-        return GetRandomLocation()  # 出错时返回随机值
-
-
-def GetFileLength(full_path: str) -> int:
-    try:
-        pattern = GetFile80hPattern(full_path)
-        if not pattern:
-            return GetRandomLength()  # 回退到随机值
-
-        attr = pattern[0]
-        if attr['is_resident']:
-            # 常驻属性: 解析sequence第三个元素的前4字节
-            content_length_bytes = attr['sequence'][2].split()[0:4]
-            return int.from_bytes(
-                bytes.fromhex(''.join(content_length_bytes)),
-                byteorder='little'
-            )
-        else:
-            # 非常驻属性: 从属性头中解析实际大小
-            return attr['attribute_length']  # 简化处理
-    except Exception as e:
-        print(f"❌ 获取Length出错: {e}, 使用随机值")
-        return GetRandomLength()  # 出错时返回随机值
-
-
 # 主函数：将 db_path 数据导入 db_node
 def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
    conn = sqlite3.connect(db_path)
@@ -154,8 +210,6 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
    cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
    rows = cursor.fetchall()

-    inserted_count = 0  # 新增：记录实际插入的条目数
-
    for row in rows:
        path_id, full_path, name, parent_id = row

@@ -172,17 +226,40 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
        extend_name_id = GetExtendNameId(name, cursor)
        file_size = GetFileSize(full_path)
        file_hash = GetFileHash(full_path)
-        extent_count = GetExtentCount(full_path)

-        # 构建插入语句字段和参数
+        # 获取文件的时间属性
+        file_times = GetFilesTime(full_path)
+        create_time = file_times["FileCreateTime"]
+        modify_time = file_times["FileModifyTime"]
+        access_time = file_times["FileAccessTime"]
+        auth_time = file_times["FileAuthTime"]
+
+        # 新增：根据 $80 属性获取更精确的 ExtentCount
+        try:
+            attribute_80_data = GetFile80hPattern(full_path)
+
+            if not attribute_80_data or not isinstance(attribute_80_data, list):
+                raise ValueError("无效的 80h 属性数据")
+
+            extent_count = GetExtentCount(attribute_80_data)
+
+            print(f"✅ 分片数量为: {extent_count}")
+
+        except Exception as e:
+            print(f"⚠️ 获取 ExtentCount 失败，使用默认值 0: {e}")
+            extent_count = 0
+
+        # 构建插入语句字段和参数（保持原样）
        fields = [
            'PathID', 'ParentID', 'NameHash', 'PathHash',
            'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
+            'FileCreateTime', 'FileModifyTime', 'FileAccessTime', 'FileAuthTime',
            'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
        ]
        values = [
            path_id, parent_id, name_hash, '',  # PathHash 待填
            extend_name_id, dir_layer, group_id, user_id,
+            create_time, modify_time, access_time, auth_time,
            file_size, 'default', file_hash, extent_count
        ]

@@ -218,16 +295,10 @@ def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):

        # 执行插入
        cursor.execute(insert_sql, values)
-        inserted_count += 1  # 新增：成功插入后计数器加1

    conn.commit()
    conn.close()
-
-    # 新增：根据插入结果输出不同信息
-    if inserted_count > 0:
-        print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name} 表")
-    else:
-        print("ℹ️ 没有新的数据被插入数据库（可能所有条目已存在或没有可处理的数据）")
+    print(f"✅ 数据已成功插入到 {table_name} 表")


 if __name__ == '__main__':
--- a/ntfs_utils/mft_analyze.py
+++ b/ntfs_utils/mft_analyze.py
@@ -60,7 +60,7 @@ def GetFileMftEntry(file_path):
    drive_letter = os.path.splitdrive(file_path)[0][0]
    device = f"\\\\.\\{drive_letter}:"

-    print(f"Opening device: {device}")
+    # print(f"Opening device: {device}")

    try:
        img = pytsk3.Img_Info(device)
@@ -73,10 +73,10 @@ def GetFileMftEntry(file_path):
    root_path = f"{drive_letter}:\\"
    rel_path = os.path.relpath(abs_path, root_path).replace("/", "\\")

-    print(f"Looking up MFT entry for: {rel_path}")
+    # print(f"Looking up MFT entry for: {rel_path}")

    mft_entry = find_file_mft_entry(fs, rel_path)
-    print(f"MFT Entry: {mft_entry}")
+    # print(f"MFT Entry: {mft_entry}")
    if mft_entry is None:
        raise RuntimeError("Could not find MFT entry for the specified file.")

@@ -105,7 +105,7 @@ def CalculateFileMftStartSector(mft_entry, volume_letter="Z"):
    start_sector = config_data["MftPosition"] * 8 + mft_entry * 2
    if start_sector < 0:
        raise ValueError("起始扇区号不能为负数")
-    print(f"文件 MFT Entry 的起始扇区号: {start_sector}")
+    # print(f"文件 MFT Entry 的起始扇区号: {start_sector}")
    return start_sector


@@ -217,11 +217,106 @@ def GetFile80hPattern(file_path):
    try:
        mft_entry_value = GetFileMftEntry(file_path)
        StartSector = CalculateFileMftStartSector(mft_entry_value, volume_letter)
-        print(f"\n文件的相关信息以及80属性内容：")
-        print(Get80hPattern(StartSector, volume_letter))
+        # print(f"文件的相关信息以及80属性内容：")
+        # print(Get80hPattern(StartSector, volume_letter))
+        file80h_pattern = Get80hPattern(StartSector, volume_letter)
+        return file80h_pattern
    except Exception as e:
        print(f"❌ Error: {e}")
+    return None


-if __name__ == '__main__':
-    GetFile80hPattern(r"Z:\hello.txt")
+# if __name__ == '__main__':
+#     GetFile80hPattern(r"Z:\demo.jpg")
+
+
+def analyze_ntfs_data_attribute(data):
+    """
+    分析 NTFS 数据结构中的80属性($DATA)，返回文件分片数量
+
+    参数:
+        data (list): 包含字典的列表，每个字典需有'sequence'键
+            (示例结构见问题描述)
+
+    返回:
+        int: 分片数量(常驻属性返回1，非常驻属性返回数据运行的分片数)
+
+    异常:
+        ValueError: 当输入数据无效时抛出
+    """
+    # 第一步：提取并转换sequence数据
+    hex_bytes = []
+    for entry in data:
+        if 'sequence' in entry:
+            for hex_str in entry['sequence']:
+                hex_bytes.extend(hex_str.split())
+
+    # 将十六进制字符串转换为整数列表
+    try:
+        attribute_data = [int(x, 16) for x in hex_bytes]
+    except ValueError:
+        raise ValueError("无效的十六进制数据")
+
+    # 第二步：分析属性结构
+    if len(attribute_data) < 24:
+        raise ValueError("属性数据过短，无法解析头部信息")
+
+    # 检查属性类型(0x80)
+    if attribute_data[0] != 0x80:
+        raise ValueError("不是80属性($DATA属性)")
+
+    # 检查是否常驻(偏移0x08)
+    is_resident = attribute_data[8] == 0
+
+    if is_resident:
+        return 1
+    else:
+        # 解析非常驻属性的数据运行列表
+        data_run_offset = attribute_data[0x20] | (attribute_data[0x21] << 8)
+
+        if data_run_offset >= len(attribute_data):
+            raise ValueError("数据运行偏移超出属性长度")
+
+        data_runs = attribute_data[data_run_offset:]
+        fragment_count = 0
+        pos = 0
+
+        while pos < len(data_runs):
+            header_byte = data_runs[pos]
+            if header_byte == 0x00:
+                break
+
+            len_len = (header_byte >> 4) & 0x0F
+            offset_len = header_byte & 0x0F
+
+            if len_len == 0 or offset_len == 0:
+                break
+
+            pos += 1 + len_len + offset_len
+            fragment_count += 1
+
+        return fragment_count
+
+
+input_data = [
+    {
+        'start_byte': 3221267456,
+        'offset': 264,
+        'sequence': [
+            '80 00 00 00 48 00 00 00',
+            '01 00 00 00 00 00 01 00',
+            '00 00 00 00 00 00 00 00',
+            '79 00 00 00 00 00 00 00',
+            '40 00 00 00 00 00 00 00',
+            '00 a0 07 00 00 00 00 00',
+            '0b 93 07 00 00 00 00 00',
+            '0b 93 07 00 00 00 00 00',
+            '31 7a 00 ee 0b 00 00 00'
+        ],
+        'is_resident': False,
+        'total_groups': 9,
+        'attribute_length': 72
+    }
+]
+
+print(analyze_ntfs_data_attribute(input_data))  # 输出分片数量
--- a/src/db_ntfs_info.db
+++ b/src/db_ntfs_info.db