analyze db_node table

This commit is contained in:
Burgess Leo
2025-05-16 17:45:35 +08:00
parent 7d21842287
commit ae777f75d9
8 changed files with 516 additions and 17 deletions

187
ntfs_utils/db_node.py Normal file
View File

@@ -0,0 +1,187 @@
import hashlib
import random
import sqlite3
from mft_analyze import GetFile80hPattern
# ✅ 工具函数:获取文件扩展名
def GetFileExtension(name: str) -> str:
parts = name.rsplit('.', 1)
if len(parts) > 1:
return parts[1].lower()
return ""
# ✅ 函数:获取 ExtendNameID基于文件名后缀
def GetExtendNameId(name: str, cursor: sqlite3.Cursor) -> int:
ext = GetFileExtension(name)
if not ext:
return 0
cursor.execute("SELECT ID FROM db_extend_name WHERE ExtendName = ?", (ext,))
result = cursor.fetchone()
return result[0] if result else 0
# ✅ 函数:获取 GroupID默认第一个
def GetFirstGroupId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_group ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# ✅ 函数:获取 UserID默认第一个
def GetFirstUserId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_user ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# ✅ 【伪代码】获取文件大小(字节)
def GetFileSize(full_path: str) -> int:
return 10
# ✅ 【伪代码】获取文件内容哈希
def GetFileHash(full_path: str) -> str:
return hashlib.sha256(b"mocked_file_content").hexdigest()
# ✅ 【伪代码】获取分片数
def GetExtentCount(full_path: str) -> int:
return 1
# ✅ 【伪代码】获取设备IDdb_device第一条记录
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
result = cursor.fetchone()
return result[0] if result else 0
# ✅ 【伪代码】获取随机 Location
def GetRandomLocation() -> int:
return random.randint(1000, 9999)
# ✅ 【伪代码】获取随机 Length
def GetRandomLength() -> int:
return random.randint(1000, 9999)
# ✅ 主函数:遍历 NewDBPath 插入 NewDBNode或自定义表名
def InsertNodeDataToDb(db_path='../src/filesystem.db', table_name='db_node'):
"""
遍历 NewDBPath 表,并生成对应的 Node 数据插入到指定表中。
参数:
db_path: str数据库路径
table_name: str目标表名
"""
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
try:
# 动态创建表(如果不存在)
create_table_sql = f"""
CREATE TABLE IF NOT EXISTS {table_name} (
ID INTEGER PRIMARY KEY AUTOINCREMENT,
PathID INTEGER NOT NULL,
ExtendNameID INTEGER,
GroupID INTEGER,
UserID INTEGER,
FileSize INTEGER,
FileHash TEXT,
ExtentCount INTEGER,
extent1_DeviceID INTEGER,
extent1_Location INTEGER,
extent1_Length INTEGER,
extent2_DeviceID INTEGER,
extent2_Location INTEGER,
extent2_Length INTEGER,
extent3_DeviceID INTEGER,
extent3_Location INTEGER,
extent3_Length INTEGER,
extent4_DeviceID INTEGER,
extent4_Location INTEGER,
extent4_Length INTEGER,
-- 外键约束
FOREIGN KEY(PathID) REFERENCES NewDBPath(ID),
FOREIGN KEY(ExtendNameID) REFERENCES db_extend(ID),
FOREIGN KEY(GroupID) REFERENCES db_group(ID),
FOREIGN KEY(UserID) REFERENCES db_user(ID)
);
"""
cursor.execute(create_table_sql)
# 获取所有 NewDBPath 记录
cursor.execute("SELECT ID, Name, Path, IsDir FROM db_path")
path_records = cursor.fetchall()
batch = []
device_id = GetDeviceId(cursor)
for path_id, name, full_path, is_dir in path_records:
if is_dir == 1:
extend_name_id = 0
else:
extend_name_id = GetExtendNameId(name, cursor)
group_id = GetFirstGroupId(cursor)
user_id = GetFirstUserId(cursor)
file_size = GetFileSize(full_path)
file_hash = GetFileHash(full_path)
extent_count = GetExtentCount(full_path)
# 构造 extent 数据(最多 4 个片段)
extent_data = []
for i in range(extent_count):
extent_data.append((device_id, GetRandomLocation(), GetRandomLength()))
# 填充到 4 个字段
while len(extent_data) < 4:
extent_data.append((0, 0, 0))
# 添加到批次插入数据
batch.append((
path_id,
extend_name_id,
group_id,
user_id,
file_size,
file_hash,
extent_count,
*extent_data[0],
*extent_data[1],
*extent_data[2],
*extent_data[3]
))
# 批量插入
insert_sql = f"""
INSERT OR IGNORE INTO {table_name} (
PathID, ExtendNameID, GroupID, UserID, FileSize, FileHash, ExtentCount,
extent1_DeviceID, extent1_Location, extent1_Length,
extent2_DeviceID, extent2_Location, extent2_Length,
extent3_DeviceID, extent3_Location, extent3_Length,
extent4_DeviceID, extent4_Location, extent4_Length
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
cursor.executemany(insert_sql, batch)
conn.commit()
print(f"✅ 成功插入 {cursor.rowcount}{table_name} 记录")
except Exception as e:
print(f"❌ 插入失败: {e}")
conn.rollback()
finally:
conn.close()
# 示例调用
if __name__ == "__main__":
InsertNodeDataToDb(db_path='../src/db_ntfs_info.db', table_name='db_node')