235 lines
7.8 KiB
Python
235 lines
7.8 KiB
Python
import hashlib
|
||
import random
|
||
import sqlite3
|
||
|
||
from mft_analyze import GetFile80hPattern
|
||
|
||
|
||
# 工具函数:获取文件扩展名
|
||
def GetFileExtension(name: str) -> str:
|
||
parts = name.rsplit('.', 1)
|
||
return parts[1].lower() if len(parts) > 1 else ""
|
||
|
||
|
||
# 获取 ExtendNameID(基于文件名后缀)
|
||
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
|
||
ext = GetFileExtension(name)
|
||
if not ext:
|
||
return 0
|
||
|
||
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取 DirLayer(路径层级)
|
||
def GetDirLayer(path: str) -> int:
|
||
# "Z:\demo.jpg" → 0 (根目录文件)
|
||
# "Z:\pictures\RHCE.jpg" → 1 (一级子目录)
|
||
path = path.strip()
|
||
if not path or path == "\\":
|
||
return 0
|
||
# 计算路径中的反斜杠数量,减去根目录的反斜杠
|
||
return path.count("\\") - 1
|
||
|
||
|
||
# 获取 GroupID(默认第一个)
|
||
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取 UserID(默认第一个)
|
||
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取设备ID(db_device第一条记录)
|
||
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
|
||
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
|
||
result = cursor.fetchone()
|
||
return result[0] if result else 0
|
||
|
||
|
||
# 获取文件大小(伪数据)
|
||
def GetFileSize(full_path: str) -> int:
|
||
return random.randint(100, 999)
|
||
|
||
|
||
# 获取文件内容哈希(伪数据)
|
||
def GetFileHash(full_path: str) -> str:
|
||
return hashlib.sha256(full_path.encode()).hexdigest()
|
||
|
||
|
||
# 获取分片数(1~4)
|
||
def GetExtentCount(full_path: str) -> int:
|
||
try:
|
||
pattern = GetFile80hPattern(full_path)
|
||
if not pattern:
|
||
return 1 # 默认值
|
||
|
||
# 取第一个80h属性(通常文件只有一个80h属性)
|
||
attr = pattern[0]
|
||
|
||
if attr['is_resident']:
|
||
return 1 # 常驻属性只有一个分片
|
||
else:
|
||
# 非常驻属性需要解析实际分片数
|
||
# 这里简化为从sequence中解析,实际可能需要更复杂的解析
|
||
return 1 # 简化处理,实际应根据数据结构解析
|
||
except Exception as e:
|
||
print(f"❌ 获取ExtentCount出错: {e}, 使用默认值1")
|
||
return 1 # 出错时返回默认值
|
||
|
||
|
||
# 获取随机位置
|
||
def GetRandomLocation() -> int:
|
||
return random.randint(1000, 9999)
|
||
|
||
|
||
# 获取随机长度
|
||
def GetRandomLength() -> int:
|
||
return random.randint(1000, 9999)
|
||
|
||
|
||
def GetFileLocation(full_path: str) -> int:
|
||
try:
|
||
pattern = GetFile80hPattern(full_path)
|
||
if not pattern:
|
||
return GetRandomLocation() # 回退到随机值
|
||
|
||
attr = pattern[0]
|
||
if attr['is_resident']:
|
||
# 常驻属性: start_byte + offset + content_offset
|
||
# 解析content_offset (sequence第三个元素的后4字节)
|
||
content_offset_bytes = attr['sequence'][2].split()[4:8]
|
||
content_offset = int.from_bytes(
|
||
bytes.fromhex(''.join(content_offset_bytes)),
|
||
byteorder='little'
|
||
)
|
||
return attr['start_byte'] + attr['offset'] + content_offset
|
||
else:
|
||
# 非常驻属性需要解析runlist
|
||
# 这里简化为返回start_byte
|
||
return attr['start_byte']
|
||
except Exception as e:
|
||
print(f"❌ 获取Location出错: {e}, 使用随机值")
|
||
return GetRandomLocation() # 出错时返回随机值
|
||
|
||
|
||
def GetFileLength(full_path: str) -> int:
|
||
try:
|
||
pattern = GetFile80hPattern(full_path)
|
||
if not pattern:
|
||
return GetRandomLength() # 回退到随机值
|
||
|
||
attr = pattern[0]
|
||
if attr['is_resident']:
|
||
# 常驻属性: 解析sequence第三个元素的前4字节
|
||
content_length_bytes = attr['sequence'][2].split()[0:4]
|
||
return int.from_bytes(
|
||
bytes.fromhex(''.join(content_length_bytes)),
|
||
byteorder='little'
|
||
)
|
||
else:
|
||
# 非常驻属性: 从属性头中解析实际大小
|
||
return attr['attribute_length'] # 简化处理
|
||
except Exception as e:
|
||
print(f"❌ 获取Length出错: {e}, 使用随机值")
|
||
return GetRandomLength() # 出错时返回随机值
|
||
|
||
|
||
# 主函数:将 db_path 数据导入 db_node
|
||
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||
conn = sqlite3.connect(db_path)
|
||
cursor = conn.cursor()
|
||
|
||
group_id = GetFirstGroupId(cursor)
|
||
user_id = GetFirstUserId(cursor)
|
||
device_id = GetDeviceId(cursor)
|
||
|
||
cursor.execute("SELECT ID, Path, Name, ParentID FROM db_path")
|
||
rows = cursor.fetchall()
|
||
|
||
inserted_count = 0 # 新增:记录实际插入的条目数
|
||
|
||
for row in rows:
|
||
path_id, full_path, name, parent_id = row
|
||
|
||
# 检查是否已存在相同 PathID
|
||
cursor.execute("SELECT COUNT(*) FROM db_node WHERE PathID = ?", (path_id,))
|
||
exists = cursor.fetchone()[0]
|
||
if exists > 0:
|
||
print(f"⚠️ PathID {path_id} 已存在,跳过插入")
|
||
continue
|
||
|
||
# 计算字段
|
||
name_hash = hashlib.sha256(name.encode()).hexdigest()
|
||
dir_layer = GetDirLayer(full_path)
|
||
extend_name_id = GetExtendNameId(name, cursor)
|
||
file_size = GetFileSize(full_path)
|
||
file_hash = GetFileHash(full_path)
|
||
extent_count = GetExtentCount(full_path)
|
||
|
||
# 构建插入语句字段和参数
|
||
fields = [
|
||
'PathID', 'ParentID', 'NameHash', 'PathHash',
|
||
'ExtendNameID', 'DirLayer', 'GroupID', 'UserID',
|
||
'FileSize', 'FileMode', 'FileHash', 'ExtentCount'
|
||
]
|
||
values = [
|
||
path_id, parent_id, name_hash, '', # PathHash 待填
|
||
extend_name_id, dir_layer, group_id, user_id,
|
||
file_size, 'default', file_hash, extent_count
|
||
]
|
||
|
||
# 查询 PathHash(与 db_path.PathHash 一致)
|
||
cursor.execute("SELECT PathHash FROM db_path WHERE ID = ?", (path_id,))
|
||
path_hash_result = cursor.fetchone()
|
||
path_hash = path_hash_result[0] if path_hash_result else ""
|
||
values[3] = path_hash # 替换 PathHash
|
||
|
||
# 处理 Extent 片段字段
|
||
extent_data = []
|
||
for i in range(1, 5):
|
||
if i <= extent_count:
|
||
location = GetRandomLocation()
|
||
length = GetRandomLength()
|
||
extent_data.extend([device_id, location, length])
|
||
else:
|
||
extent_data.extend([None, None, None])
|
||
|
||
# 拼接字段和值
|
||
extent_fields = [
|
||
"extent1_DeviceID", "extent1_Location", "extent1_Length",
|
||
"extent2_DeviceID", "extent2_Location", "extent2_Length",
|
||
"extent3_DeviceID", "extent3_Location", "extent3_Length",
|
||
"extent4_DeviceID", "extent4_Location", "extent4_Length"
|
||
]
|
||
fields += extent_fields
|
||
values += extent_data
|
||
|
||
# 构建 SQL 插入语句
|
||
placeholders = ', '.join('?' * len(values))
|
||
insert_sql = f"INSERT INTO {table_name} ({', '.join(fields)}) VALUES ({placeholders})"
|
||
|
||
# 执行插入
|
||
cursor.execute(insert_sql, values)
|
||
inserted_count += 1 # 新增:成功插入后计数器加1
|
||
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
# 新增:根据插入结果输出不同信息
|
||
if inserted_count > 0:
|
||
print(f"✅ 成功插入 {inserted_count} 条数据到 {table_name} 表")
|
||
else:
|
||
print("ℹ️ 没有新的数据被插入数据库(可能所有条目已存在或没有可处理的数据)")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
InsertNodeDataToDB()
|