fastcopy/ntfs_utils/db_node.py

import hashlib
import random
import sqlite3

from mft_analyze import GetFile80hPattern


# 工具函数：获取文件扩展名
def GetFileExtension(name: str) -> str:
    parts = name.rsplit('.', 1)
    return parts[1].lower() if len(parts) > 1 else ""


# 获取 ExtendNameID（基于文件名后缀）
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
    ext = GetFileExtension(name)
    if not ext:
        return 0

    cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
    result = cursor.fetchone()
    return result[0] if result else 0


# 获取 DirLayer（路径层级）
def GetDirLayer(path: str) -> int:
    # "Z:\demo.jpg" → 0 (根目录文件)
    # "Z:\pictures\RHCE.jpg" → 1 (一级子目录)
    path = path.strip()
    if not path or path == "\\":
        return 0
    # 计算路径中的反斜杠数量，减去根目录的反斜杠
    return path.count("\\") - 1


# 获取 GroupID（默认第一个）
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
    cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
    result = cursor.fetchone()
    return result[0] if result else 0


# 获取 UserID（默认第一个）
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
    cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
    result = cursor.fetchone()
    return result[0] if result else 0


# 获取设备ID（db_device第一条记录）
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
    cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
    result = cursor.fetchone()
    return result[0] if result else 0


# 获取文件大小（伪数据）
def GetFileSize(full_path: str) -> int:
    return random.randint(100, 999)


# 获取文件内容哈希（伪数据）
def GetFileHash(full_path: str) -> str:
    return hashlib.sha256(full_path.encode()).hexdigest()


# 获取分片数（1~4）
def GetExtentCount(full_path: str) -> int:
    try:
        pattern = GetFile80hPattern(full_path)
        if not pattern:
            return 1  # 默认值

        # 取第一个80h属性(通常文件只有一个80h属性)
        attr = pattern[0]

        if attr['is_resident']:
            return 1  # 常驻属性只有一个分片
        else:
            # 非常驻属性需要解析实际分片数
            # 这里简化为从sequence中解析，实际可能需要更复杂的解析
            return 1  # 简化处理，实际应根据数据结构解析
    except Exception as e:
        print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1")
        return 1  # 出错时返回默认值


# 获取随机位置
def GetRandomLocation() -> int:
    return random.randint(1000, 9999)


# 获取随机长度
def GetRandomLength() -> int:
    return random.randint(1000, 9999)


def GetFileLocation(full_path: str) -> int:
    try:
        pattern = GetFile80hPattern(full_path)
        if not pattern:
            return GetRandomLocation()  # 回退到随机值

        attr = pattern[0]
        if attr['is_resident']:
            # 常驻属性: start_byte + offset + content_offset
            # 解析content_offset (sequence第三个元素的后4字节)
            content_offset_bytes = attr['sequence'][2].split()[4:8]
            content_offset = int.from_bytes(
                bytes.fromhex(''.join(content_offset_bytes)),
                byteorder='little'
            )
            return attr['start_byte'] + attr['offset'] + content_offset
        else:
            # 非常驻属性需要解析runlist
            # 这里简化为返回start_byte
            return attr['start_byte']
    except Exception as e:
        print(f"❌ 获取Location出错: {e}, 使用随机值")
        return GetRandomLocation()  # 出错时返回随机值


def GetFileLength(full_path: str) -> int:
    try:
        pattern = GetFile80hPattern(full_path)
        if not pattern:
            return GetRandomLength()  # 回退到随机值

        attr = pattern[0]
        if attr['is_resident']:
            # 常驻属性: 解析sequence第三个元素的前4字节
            content_length_bytes = attr['sequence'][2].split()[0:4]
            return int.from_bytes(
                bytes.fromhex(''.join(content_length_bytes)),
                byteorder='little'
            )
        else:
            # 非常驻属性: 从属性头中解析实际大小
            return attr['attribute_length']  # 简化处理
    except Exception as e:
        print(f"❌ 获取Length出错: {e}, 使用随机值")
        return GetRandomLength()  # 出错时返回随机值


# 主函数：将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    group_id = GetFirstGroupId(cursor)
    user_id = GetFirstUserId(cursor)
    device_id = GetDeviceId(cursor)

    cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
    rows = cursor.fetchall()

    inserted_count = 0  # 新增：记录实际插入的条目数

    for row in rows:
        path_id, full_path, name, parent_id = row

        # 检查是否已存在相同 PathID
        cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
        exists = cursor.fetchone()[0]
        if exists > 0:
            print(f"⚠️ PathID {path_id} 已存在，跳过插入")
            continue

        # 计算字段
        name_hash = hashlib.sha256(name.encode()).hexdigest()
        dir_layer = GetDirLayer(full_path)
        extend_name_id = GetExtendNameId(name, cursor)
        file_size = GetFileSize(full_path)
        file_hash = GetFileHash(full_path)
        extent_count = GetExtentCount(full_path)

        # 构建插入语句字段和参数
        fields = [
            'PathID', 'ParentID', 'NameHash', 'PathHash',
            'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
            'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
        ]
        values = [
            path_id, parent_id, name_hash, '',  # PathHash 待填
            extend_name_id, dir_layer, group_id, user_id,
            file_size, 'default', file_hash, extent_count
        ]

        # 查询 PathHash（与 db_path.PathHash 一致）
        cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
        path_hash_result = cursor.fetchone()
        path_hash = path_hash_result[0] if path_hash_result else ""
        values[3] = path_hash  # 替换 PathHash

        # 处理 Extent 片段字段
        extent_data = []
        for i in range(1, 5):
            if i <= extent_count:
                location = GetRandomLocation()
                length = GetRandomLength()
                extent_data.extend([device_id, location, length])
            else:
                extent_data.extend([None, None, None])

        # 拼接字段和值
        extent_fields = [
            "extent1_DeviceID", "extent1_Location", "extent1_Length",
            "extent2_DeviceID", "extent2_Location", "extent2_Length",
            "extent3_DeviceID", "extent3_Location", "extent3_Length",
            "extent4_DeviceID", "extent4_Location", "extent4_Length"
        ]
        fields += extent_fields
        values += extent_data

        # 构建 SQL 插入语句
        placeholders = ', '.join('?' * len(values))
        insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"

        # 执行插入
        cursor.execute(insert_sql, values)
        inserted_count += 1  # 新增：成功插入后计数器加1

    conn.commit()
    conn.close()

    # 新增：根据插入结果输出不同信息
    if inserted_count > 0:
        print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name} 表")
    else:
        print("ℹ️ 没有新的数据被插入数据库（可能所有条目已存在或没有可处理的数据）")


if __name__ == '__main__':
    InsertNodeDataToDB()