optimize db_path memory
This commit is contained in:
@@ -25,10 +25,10 @@ def ClearTableRecordsWithReset(db_path, table_name):
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_path')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_path')
|
||||||
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_node')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_device')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_config')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_user')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_user')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_group')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_group')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_extent')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_extent')
|
||||||
# ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_name')
|
ClearTableRecordsWithReset(db_path='../src/db_ntfs_info.db', table_name='db_extend_name')
|
||||||
|
|
||||||
|
@@ -0,0 +1,14 @@
|
|||||||
|
import sqlite3
|
||||||
|
|
||||||
|
db_path = "../src/db_ntfs_info.db"
|
||||||
|
conn = sqlite3.connect(db_path)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
def GetDeviceId(cursor: sqlite3.Cursor) -> int:
|
||||||
|
cursor.execute("SELECT ID FROM db_device ORDER BY ID LIMIT 1")
|
||||||
|
result = cursor.fetchone()
|
||||||
|
return result[0] if result else 0
|
||||||
|
|
||||||
|
|
||||||
|
print(GetDeviceId(cursor))
|
||||||
|
@@ -4,6 +4,7 @@ import sqlite3
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
from ntfs_utils.mft_analyze import GetFile80hPattern, GetFragmentData, ExtractSequenceHexValues, hex_list_to_int
|
||||||
|
from ntfs_utils.main import volume_letter
|
||||||
|
|
||||||
|
|
||||||
# 工具函数:获取文件扩展名
|
# 工具函数:获取文件扩展名
|
||||||
@@ -131,11 +132,10 @@ def GetFragmentLength(fragment):
|
|||||||
|
|
||||||
|
|
||||||
# 主函数:将 db_path 数据导入 db_node
|
# 主函数:将 db_path 数据导入 db_node
|
||||||
def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
def InsertNodeDataToDB(db_path='../src/db_ntfs_info.db', table_name='db_node', batch_size=20):
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
volume_letter = volume_letter.upper().strip()
|
|
||||||
if len(volume_letter) == 1:
|
if len(volume_letter) == 1:
|
||||||
volume_root = f"{volume_letter}:\\"
|
volume_root = f"{volume_letter}:\\"
|
||||||
elif volume_letter.endswith(':'):
|
elif volume_letter.endswith(':'):
|
||||||
@@ -254,5 +254,4 @@ def InsertNodeDataToDB(volume_letter: str, db_path='../src/db_ntfs_info.db', tab
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
volume_letter_test = "Y"
|
InsertNodeDataToDB()
|
||||||
InsertNodeDataToDB(volume_letter=volume_letter_test)
|
|
||||||
|
@@ -23,7 +23,7 @@ def ShouldSkipPath(path: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def ScanVolume(volume_letter: str):
|
def ScanVolume(volume_letter: str) -> list:
|
||||||
"""
|
"""
|
||||||
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
||||||
并为每个节点分配 ParentID。
|
并为每个节点分配 ParentID。
|
||||||
@@ -33,7 +33,6 @@ def ScanVolume(volume_letter: str):
|
|||||||
if not os.path.exists(root_path):
|
if not os.path.exists(root_path):
|
||||||
raise ValueError(f"磁盘 {root_path} 不存在")
|
raise ValueError(f"磁盘 {root_path} 不存在")
|
||||||
|
|
||||||
result = []
|
|
||||||
path_to_id = {} # 路径 -> ID 映射
|
path_to_id = {} # 路径 -> ID 映射
|
||||||
counter = 1
|
counter = 1
|
||||||
|
|
||||||
@@ -61,15 +60,12 @@ def ScanVolume(volume_letter: str):
|
|||||||
|
|
||||||
name = entry
|
name = entry
|
||||||
|
|
||||||
# 分离盘符并去除开头和结尾的 \
|
# 分离盘符并处理路径格式
|
||||||
_, relative_path = os.path.splitdrive(full_path)
|
_, relative_path = os.path.splitdrive(full_path)
|
||||||
relative_path = relative_path.lstrip("\\").rstrip("\\")
|
relative_path = relative_path.lstrip("\\").rstrip("\\")
|
||||||
|
|
||||||
# 如果是目录,结尾加 /
|
|
||||||
if os.path.isdir(full_path) and not relative_path.endswith("/"):
|
if os.path.isdir(full_path) and not relative_path.endswith("/"):
|
||||||
relative_path += "/"
|
relative_path += "/"
|
||||||
|
|
||||||
# 替换所有 \ -> /
|
|
||||||
relative_path = relative_path.replace("\\", "/")
|
relative_path = relative_path.replace("\\", "/")
|
||||||
|
|
||||||
path_hash = GenerateHash(relative_path)
|
path_hash = GenerateHash(relative_path)
|
||||||
@@ -97,25 +93,27 @@ def ScanVolume(volume_letter: str):
|
|||||||
"ContentSize": content_size
|
"ContentSize": content_size
|
||||||
}
|
}
|
||||||
|
|
||||||
result.append(item)
|
yield item # 使用 yield 返回每条记录
|
||||||
path_to_id[relative_path] = counter
|
path_to_id[relative_path] = counter
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"⚠️ 跳过路径 {full_path},错误: {e}")
|
print(f"⚠️ 跳过路径 {full_path},错误: {e}")
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
def InsertPathDataToDB(data_generator, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||||
def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
|
||||||
"""
|
"""
|
||||||
批量将扫描结果写入数据库。
|
流式写入数据库,边扫描边入库。
|
||||||
|
|
||||||
|
:param data_generator: 可迭代对象(如生成器)
|
||||||
|
:param db_path: 数据库路径
|
||||||
|
:param table_name: 表名
|
||||||
|
:param batch_size: 每多少条记录提交一次
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 创建表(如果不存在)
|
|
||||||
create_table_sql = f"""
|
create_table_sql = f"""
|
||||||
CREATE TABLE IF NOT EXISTS {table_name} (
|
CREATE TABLE IF NOT EXISTS {table_name} (
|
||||||
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
@@ -125,23 +123,20 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
||||||
ParentID INTEGER,
|
ParentID INTEGER,
|
||||||
ContentSize INTEGER,
|
ContentSize INTEGER,
|
||||||
|
|
||||||
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
cursor.execute(create_table_sql)
|
cursor.execute(create_table_sql)
|
||||||
|
|
||||||
# 插入语句(忽略重复 PathHash)
|
|
||||||
insert_sql = f"""
|
insert_sql = f"""
|
||||||
INSERT OR IGNORE INTO {table_name}
|
INSERT OR IGNORE INTO {table_name}
|
||||||
(Path, Name, PathHash, IsDir, ParentID, ContentSize)
|
(Path, Name, PathHash, IsDir, ParentID, ContentSize)
|
||||||
VALUES (?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
total_inserted = 0
|
|
||||||
batch = []
|
batch = []
|
||||||
|
|
||||||
for item in data:
|
for item in data_generator:
|
||||||
batch.append((
|
batch.append((
|
||||||
item['Path'],
|
item['Path'],
|
||||||
item['Name'],
|
item['Name'],
|
||||||
@@ -154,39 +149,34 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
if len(batch) >= batch_size:
|
if len(batch) >= batch_size:
|
||||||
cursor.executemany(insert_sql, batch)
|
cursor.executemany(insert_sql, batch)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
total_inserted += cursor.rowcount
|
|
||||||
print(f"✅ 提交一批 {len(batch)} 条数据")
|
print(f"✅ 提交一批 {len(batch)} 条数据")
|
||||||
batch.clear()
|
batch.clear()
|
||||||
|
|
||||||
# 插入剩余数据
|
# 提交剩余不足一批的数据
|
||||||
if batch:
|
if batch:
|
||||||
cursor.executemany(insert_sql, batch)
|
cursor.executemany(insert_sql, batch)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
total_inserted += cursor.rowcount
|
|
||||||
print(f"✅ 提交最后一批 {len(batch)} 条数据")
|
print(f"✅ 提交最后一批 {len(batch)} 条数据")
|
||||||
|
|
||||||
print(f"✅ 总共插入 {total_inserted} 条记录到数据库。")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ 插入失败: {e}")
|
print(f"❌ 插入失败: {e}")
|
||||||
conn.rollback()
|
conn.rollback()
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
# 示例主函数
|
# 示例主函数
|
||||||
def main():
|
def DBPathMain(volume_letter: str):
|
||||||
volume_letter = "Y"
|
|
||||||
|
|
||||||
print(f"🔍 开始全盘扫描磁盘 {volume_letter}:\\ ...")
|
print(f"🔍 开始全盘扫描磁盘 {volume_letter}:\\ ...")
|
||||||
scanned_data = ScanVolume(volume_letter)
|
|
||||||
|
|
||||||
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
|
# 获取生成器对象
|
||||||
InsertPathDataToDB(scanned_data)
|
generator = ScanVolume(volume_letter)
|
||||||
|
|
||||||
|
print(f"📊 开始逐批入库...")
|
||||||
|
InsertPathDataToDB(generator)
|
||||||
|
|
||||||
print("✅ 全盘扫描与入库完成")
|
print("✅ 全盘扫描与入库完成")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
DBPathMain(volume_letter="Y")
|
||||||
|
@@ -2,8 +2,8 @@ from ntfs_utils.db_config import GetNTFSBootInfo, InsertInfoToDBConfig
|
|||||||
from ntfs_utils.db_device import ScanSpecialVolumes, InsertVolumesToDB
|
from ntfs_utils.db_device import ScanSpecialVolumes, InsertVolumesToDB
|
||||||
from ntfs_utils.db_extend_name import InsertExtensionsToDB
|
from ntfs_utils.db_extend_name import InsertExtensionsToDB
|
||||||
from ntfs_utils.db_group import InsertGroupToDB
|
from ntfs_utils.db_group import InsertGroupToDB
|
||||||
from ntfs_utils.db_node import InsertNodeDataToDB
|
# from ntfs_utils.db_node import InsertNodeDataToDB
|
||||||
from ntfs_utils.db_path import ScanVolume, InsertPathDataToDB
|
from ntfs_utils.db_path import DBPathMain
|
||||||
from ntfs_utils.db_user import InsertUserToDB
|
from ntfs_utils.db_user import InsertUserToDB
|
||||||
|
|
||||||
volume_letter = 'Y'
|
volume_letter = 'Y'
|
||||||
@@ -26,10 +26,6 @@ def main():
|
|||||||
group_name_list = ["Copier"]
|
group_name_list = ["Copier"]
|
||||||
InsertGroupToDB(group_name_list)
|
InsertGroupToDB(group_name_list)
|
||||||
|
|
||||||
# 初始化 db_path 表
|
|
||||||
scanned_data = ScanVolume(volume_letter)
|
|
||||||
InsertPathDataToDB(scanned_data)
|
|
||||||
|
|
||||||
# 初始化 db_extend_name 表
|
# 初始化 db_extend_name 表
|
||||||
common_extensions = [
|
common_extensions = [
|
||||||
"txt", "log", "csv", "xls", "xlsx", "doc", "docx",
|
"txt", "log", "csv", "xls", "xlsx", "doc", "docx",
|
||||||
@@ -41,8 +37,11 @@ def main():
|
|||||||
count = InsertExtensionsToDB(common_extensions)
|
count = InsertExtensionsToDB(common_extensions)
|
||||||
print(f"共插入 {count} 个新扩展名。")
|
print(f"共插入 {count} 个新扩展名。")
|
||||||
|
|
||||||
|
# 初始化 db_path 表
|
||||||
|
DBPathMain(volume_letter=volume_letter)
|
||||||
|
|
||||||
# 初始化 db_node 表
|
# 初始化 db_node 表
|
||||||
InsertNodeDataToDB(volume_letter)
|
# InsertNodeDataToDB(volume_letter)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Reference in New Issue
Block a user