optimize db_path memory
This commit is contained in:
@@ -25,10 +25,10 @@ def ClearTableRecordsWithReset(db_path, table_name):
|
||||
if __name__ == '__main__':
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_path')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_user')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_group')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_extent')
|
||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_name')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_user')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_group')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_extent')
|
||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_name')
|
||||
|
||||
|
@@ -0,0 +1,14 @@
|
||||
import sqlite3
|
||||
|
||||
db_path = "../src/db_ntfs_info.db"
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
|
||||
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
|
||||
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
|
||||
result = cursor.fetchone()
|
||||
return result[0] if result else 0
|
||||
|
||||
|
||||
print(GetDeviceId(cursor))
|
||||
|
@@ -4,6 +4,7 @@ import sqlite3
|
||||
from datetime import datetime
|
||||
|
||||
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
||||
from ntfs_utils.main import volume_letter
|
||||
|
||||
|
||||
# 工具函数:获取文件扩展名
|
||||
@@ -131,11 +132,10 @@ def GetFragmentLength(fragment):
|
||||
|
||||
|
||||
# 主函数:将 db_path 数据导入 db_node
|
||||
def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
||||
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
volume_letter = volume_letter.upper().strip()
|
||||
if len(volume_letter) == 1:
|
||||
volume_root = f"{volume_letter}:\\"
|
||||
elif volume_letter.endswith(':'):
|
||||
@@ -254,5 +254,4 @@ def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', tab
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
volume_letter_test = "Y"
|
||||
InsertNodeDataToDB(volume_letter=volume_letter_test)
|
||||
InsertNodeDataToDB()
|
||||
|
@@ -23,7 +23,7 @@ def ShouldSkipPath(path: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def ScanVolume(volume_letter: str):
|
||||
def ScanVolume(volume_letter: str) -> list:
|
||||
"""
|
||||
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
||||
并为每个节点分配 ParentID。
|
||||
@@ -33,7 +33,6 @@ def ScanVolume(volume_letter: str):
|
||||
if not os.path.exists(root_path):
|
||||
raise ValueError(f"磁盘 {root_path} 不存在")
|
||||
|
||||
result = []
|
||||
path_to_id = {} # 路径 -> ID 映射
|
||||
counter = 1
|
||||
|
||||
@@ -61,15 +60,12 @@ def ScanVolume(volume_letter: str):
|
||||
|
||||
name = entry
|
||||
|
||||
# 分离盘符并去除开头和结尾的 \
|
||||
# 分离盘符并处理路径格式
|
||||
_, relative_path = os.path.splitdrive(full_path)
|
||||
relative_path = relative_path.lstrip("\\").rstrip("\\")
|
||||
|
||||
# 如果是目录,结尾加 /
|
||||
if os.path.isdir(full_path) and not relative_path.endswith("/"):
|
||||
relative_path += "/"
|
||||
|
||||
# 替换所有 \ -> /
|
||||
relative_path = relative_path.replace("\\", "/")
|
||||
|
||||
path_hash = GenerateHash(relative_path)
|
||||
@@ -97,25 +93,27 @@ def ScanVolume(volume_letter: str):
|
||||
"ContentSize": content_size
|
||||
}
|
||||
|
||||
result.append(item)
|
||||
yield item # 使用 yield 返回每条记录
|
||||
path_to_id[relative_path] = counter
|
||||
counter += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 跳过路径 {full_path},错误: {e}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||
def InsertPathDataToDB(data_generator, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||
"""
|
||||
批量将扫描结果写入数据库。
|
||||
流式写入数据库,边扫描边入库。
|
||||
|
||||
:param data_generator: 可迭代对象(如生成器)
|
||||
:param db_path: 数据库路径
|
||||
:param table_name: 表名
|
||||
:param batch_size: 每多少条记录提交一次
|
||||
"""
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
# 创建表(如果不存在)
|
||||
create_table_sql = f"""
|
||||
CREATE TABLE IF NOT EXISTS {table_name} (
|
||||
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -125,23 +123,20 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
||||
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
||||
ParentID INTEGER,
|
||||
ContentSize INTEGER,
|
||||
|
||||
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
||||
);
|
||||
"""
|
||||
cursor.execute(create_table_sql)
|
||||
|
||||
# 插入语句(忽略重复 PathHash)
|
||||
insert_sql = f"""
|
||||
INSERT OR IGNORE INTO {table_name}
|
||||
(Path, Name, PathHash, IsDir, ParentID, ContentSize)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
total_inserted = 0
|
||||
batch = []
|
||||
|
||||
for item in data:
|
||||
for item in data_generator:
|
||||
batch.append((
|
||||
item['Path'],
|
||||
item['Name'],
|
||||
@@ -154,39 +149,34 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
||||
if len(batch) >= batch_size:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
total_inserted += cursor.rowcount
|
||||
print(f"✅ 提交一批 {len(batch)} 条数据")
|
||||
batch.clear()
|
||||
|
||||
# 插入剩余数据
|
||||
# 提交剩余不足一批的数据
|
||||
if batch:
|
||||
cursor.executemany(insert_sql, batch)
|
||||
conn.commit()
|
||||
total_inserted += cursor.rowcount
|
||||
print(f"✅ 提交最后一批 {len(batch)} 条数据")
|
||||
|
||||
print(f"✅ 总共插入 {total_inserted} 条记录到数据库。")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 插入失败: {e}")
|
||||
conn.rollback()
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# 示例主函数
|
||||
def main():
|
||||
volume_letter = "Y"
|
||||
|
||||
def DBPathMain(volume_letter: str):
|
||||
print(f"🔍 开始全盘扫描磁盘 {volume_letter}:\\ ...")
|
||||
scanned_data = ScanVolume(volume_letter)
|
||||
|
||||
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
|
||||
InsertPathDataToDB(scanned_data)
|
||||
# 获取生成器对象
|
||||
generator = ScanVolume(volume_letter)
|
||||
|
||||
print(f"📊 开始逐批入库...")
|
||||
InsertPathDataToDB(generator)
|
||||
|
||||
print("✅ 全盘扫描与入库完成")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
DBPathMain(volume_letter="Y")
|
||||
|
@@ -2,8 +2,8 @@ from ntfs_utils.db_config import GetNTFSBootInfo, InsertInfoToDBConfig
|
||||
from ntfs_utils.db_device import ScanSpecialVolumes, InsertVolumesToDB
|
||||
from ntfs_utils.db_extend_name import InsertExtensionsToDB
|
||||
from ntfs_utils.db_group import InsertGroupToDB
|
||||
from ntfs_utils.db_node import InsertNodeDataToDB
|
||||
from ntfs_utils.db_path import ScanVolume, InsertPathDataToDB
|
||||
# from ntfs_utils.db_node import InsertNodeDataToDB
|
||||
from ntfs_utils.db_path import DBPathMain
|
||||
from ntfs_utils.db_user import InsertUserToDB
|
||||
|
||||
volume_letter = 'Y'
|
||||
@@ -26,10 +26,6 @@ def main():
|
||||
group_name_list = ["Copier"]
|
||||
InsertGroupToDB(group_name_list)
|
||||
|
||||
# 初始化 db_path 表
|
||||
scanned_data = ScanVolume(volume_letter)
|
||||
InsertPathDataToDB(scanned_data)
|
||||
|
||||
# 初始化 db_extend_name 表
|
||||
common_extensions = [
|
||||
"txt", "log", "csv", "xls", "xlsx", "doc", "docx",
|
||||
@@ -41,8 +37,11 @@ def main():
|
||||
count = InsertExtensionsToDB(common_extensions)
|
||||
print(f"共插入 {count} 个新扩展名。")
|
||||
|
||||
# 初始化 db_path 表
|
||||
DBPathMain(volume_letter=volume_letter)
|
||||
|
||||
# 初始化 db_node 表
|
||||
InsertNodeDataToDB(volume_letter)
|
||||
# InsertNodeDataToDB(volume_letter)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
Reference in New Issue
Block a user