Files
fastcopy/ntfs_utils/db_node.py
Burgess Leo 07a4ae7a74 temp restore
2025-05-19 13:25:07 +08:00

235 lines
7.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import hashlib
import random
import sqlite3
from mft_analyze import GetFile80hPattern
# 工具函数:获取文件扩展名
def GetFileExtension(name: str) -> str:
parts = name.rsplit('.', 1)
return parts[1].lower() if len(parts) > 1 else ""
# 获取 ExtendNameID基于文件名后缀
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
ext = GetFileExtension(name)
if not ext:
return 0
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
result = cursor.fetchone()
return result[0] if result else 0
# 获取 DirLayer路径层级
def GetDirLayer(path: str) -> int:
# "Z:\demo.jpg" → 0 (根目录文件)
# "Z:\pictures\RHCE.jpg" → 1 (一级子目录)
path = path.strip()
if not path or path == "\\":
return 0
# 计算路径中的反斜杠数量,减去根目录的反斜杠
return path.count("\\") - 1
# 获取 GroupID默认第一个
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取 UserID默认第一个
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取设备IDdb_device第一条记录
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# 获取文件大小(伪数据)
def GetFileSize(full_path: str) -> int:
return random.randint(100, 999)
# 获取文件内容哈希(伪数据)
def GetFileHash(full_path: str) -> str:
return hashlib.sha256(full_path.encode()).hexdigest()
# 获取分片数1~4
def GetExtentCount(full_path: str) -> int:
try:
pattern = GetFile80hPattern(full_path)
if not pattern:
return 1 # 默认值
# 取第一个80h属性(通常文件只有一个80h属性)
attr = pattern[0]
if attr['is_resident']:
return 1 # 常驻属性只有一个分片
else:
# 非常驻属性需要解析实际分片数
# 这里简化为从sequence中解析实际可能需要更复杂的解析
return 1 # 简化处理,实际应根据数据结构解析
except Exception as e:
print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1")
return 1 # 出错时返回默认值
# 获取随机位置
def GetRandomLocation() -> int:
return random.randint(1000, 9999)
# 获取随机长度
def GetRandomLength() -> int:
return random.randint(1000, 9999)
def GetFileLocation(full_path: str) -> int:
try:
pattern = GetFile80hPattern(full_path)
if not pattern:
return GetRandomLocation() # 回退到随机值
attr = pattern[0]
if attr['is_resident']:
# 常驻属性: start_byte + offset + content_offset
# 解析content_offset (sequence第三个元素的后4字节)
content_offset_bytes = attr['sequence'][2].split()[4:8]
content_offset = int.from_bytes(
bytes.fromhex(''.join(content_offset_bytes)),
byteorder='little'
)
return attr['start_byte'] + attr['offset'] + content_offset
else:
# 非常驻属性需要解析runlist
# 这里简化为返回start_byte
return attr['start_byte']
except Exception as e:
print(f"❌ 获取Location出错: {e}, 使用随机值")
return GetRandomLocation() # 出错时返回随机值
def GetFileLength(full_path: str) -> int:
try:
pattern = GetFile80hPattern(full_path)
if not pattern:
return GetRandomLength() # 回退到随机值
attr = pattern[0]
if attr['is_resident']:
# 常驻属性: 解析sequence第三个元素的前4字节
content_length_bytes = attr['sequence'][2].split()[0:4]
return int.from_bytes(
bytes.fromhex(''.join(content_length_bytes)),
byteorder='little'
)
else:
# 非常驻属性: 从属性头中解析实际大小
return attr['attribute_length'] # 简化处理
except Exception as e:
print(f"❌ 获取Length出错: {e}, 使用随机值")
return GetRandomLength() # 出错时返回随机值
# 主函数:将 db_path 数据导入 db_node
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
device_id = GetDeviceId(cursor)
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
rows = cursor.fetchall()
inserted_count = 0 # 新增:记录实际插入的条目数
for row in rows:
path_id, full_path, name, parent_id = row
# 检查是否已存在相同 PathID
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
exists = cursor.fetchone()[0]
if exists > 0:
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
continue
# 计算字段
name_hash = hashlib.sha256(name.encode()).hexdigest()
dir_layer = GetDirLayer(full_path)
extend_name_id = GetExtendNameId(name, cursor)
file_size = GetFileSize(full_path)
file_hash = GetFileHash(full_path)
extent_count = GetExtentCount(full_path)
# 构建插入语句字段和参数
fields = [
'PathID', 'ParentID', 'NameHash', 'PathHash',
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
]
values = [
path_id, parent_id, name_hash, '', # PathHash 待填
extend_name_id, dir_layer, group_id, user_id,
file_size, 'default', file_hash, extent_count
]
# 查询 PathHash与 db_path.PathHash 一致)
cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
path_hash_result = cursor.fetchone()
path_hash = path_hash_result[0] if path_hash_result else ""
values[3] = path_hash # 替换 PathHash
# 处理 Extent 片段字段
extent_data = []
for i in range(1, 5):
if i <= extent_count:
location = GetRandomLocation()
length = GetRandomLength()
extent_data.extend([device_id, location, length])
else:
extent_data.extend([None, None, None])
# 拼接字段和值
extent_fields = [
"extent1_DeviceID", "extent1_Location", "extent1_Length",
"extent2_DeviceID", "extent2_Location", "extent2_Length",
"extent3_DeviceID", "extent3_Location", "extent3_Length",
"extent4_DeviceID", "extent4_Location", "extent4_Length"
]
fields += extent_fields
values += extent_data
# 构建 SQL 插入语句
placeholders = ', '.join('?' * len(values))
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
# 执行插入
cursor.execute(insert_sql, values)
inserted_count += 1 # 新增成功插入后计数器加1
conn.commit()
conn.close()
# 新增:根据插入结果输出不同信息
if inserted_count > 0:
print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name}")
else:
print(" 没有新的数据被插入数据库(可能所有条目已存在或没有可处理的数据)")
if __name__ == '__main__':
InsertNodeDataToDB()