reanalyze db_path schema
This commit is contained in:
@@ -86,9 +86,9 @@ def CreateDBDeviceTable(db_path='../src/db_ntfs_info.db', table_name='db_device'
|
|||||||
|
|
||||||
def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
||||||
"""
|
"""
|
||||||
在指定路径下创建 SQLite 数据库,并在其中创建节点信息表。
|
创建 NewDBNode 表,用于存储文件的具体属性和物理分布信息。
|
||||||
|
|
||||||
:param db_path: str, 数据库文件的路径
|
:param db_path: str, 数据库文件路径
|
||||||
:param table_name: str, 要创建的表名
|
:param table_name: str, 要创建的表名
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
@@ -100,28 +100,18 @@ def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
|||||||
|
|
||||||
# 连接到SQLite数据库(如果文件不存在会自动创建)
|
# 连接到SQLite数据库(如果文件不存在会自动创建)
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON;") # 开启外键支持
|
||||||
# 创建一个游标对象
|
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
# 动态构建创建表的SQL语句
|
# 动态构建创建表的SQL语句
|
||||||
create_table_sql = f"""
|
create_table_sql = f"""
|
||||||
CREATE TABLE IF NOT EXISTS {table_name} (
|
CREATE TABLE IF NOT EXISTS {table_name} (
|
||||||
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
PathID INTEGER,
|
PathID INTEGER NOT NULL,
|
||||||
ParentID INTEGER,
|
|
||||||
NameHash TEXT,
|
|
||||||
PathHash TEXT,
|
|
||||||
ExtendNameID INTEGER,
|
ExtendNameID INTEGER,
|
||||||
DirLayer INTEGER,
|
|
||||||
GroupID INTEGER,
|
GroupID INTEGER,
|
||||||
UserID INTEGER,
|
UserID INTEGER,
|
||||||
FileCreateTime TEXT,
|
|
||||||
FileModifyTime TEXT,
|
|
||||||
FileAccessTime TEXT,
|
|
||||||
FileAuthTime TEXT,
|
|
||||||
FileSize INTEGER,
|
FileSize INTEGER,
|
||||||
FileMode INTEGER,
|
|
||||||
FileHash TEXT,
|
FileHash TEXT,
|
||||||
ExtentCount INTEGER,
|
ExtentCount INTEGER,
|
||||||
extent1_DeviceID INTEGER,
|
extent1_DeviceID INTEGER,
|
||||||
@@ -137,21 +127,17 @@ def CreateDBNodeTable(db_path='../src/db_ntfs_info.db', table_name='db_node'):
|
|||||||
extent4_Location INTEGER,
|
extent4_Location INTEGER,
|
||||||
extent4_Length INTEGER,
|
extent4_Length INTEGER,
|
||||||
|
|
||||||
-- 外键约束(可选)
|
-- 外键约束
|
||||||
FOREIGN KEY(PathID) REFERENCES path_table(ID),
|
FOREIGN KEY(PathID) REFERENCES NewDBPath(ID),
|
||||||
FOREIGN KEY(ExtendNameID) REFERENCES extname_table(ID),
|
FOREIGN KEY(ExtendNameID) REFERENCES db_extend(ID),
|
||||||
FOREIGN KEY(GroupID) REFERENCES groups(ID),
|
FOREIGN KEY(GroupID) REFERENCES db_group(ID),
|
||||||
FOREIGN KEY(UserID) REFERENCES users(ID)
|
FOREIGN KEY(UserID) REFERENCES db_user(ID)
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 执行SQL语句
|
# 执行SQL语句
|
||||||
cursor.execute(create_table_sql)
|
cursor.execute(create_table_sql)
|
||||||
|
|
||||||
# 提交更改
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
# 关闭连接
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
||||||
@@ -280,12 +266,11 @@ def CreateDBExtendSnippetTable(db_path='../src/db_ntfs_info.db', table_name='db_
|
|||||||
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
||||||
|
|
||||||
|
|
||||||
def CreateDBPathTable(db_path='../src/db_path.db', table_name='db_path'):
|
def CreateDBPathTable(db_path='../src/db_ntfs_info.db', table_name='db_path'):
|
||||||
"""
|
"""
|
||||||
在指定路径下创建 SQLite 数据库,并在其中创建路径信息表,
|
创建 NewDBPath 表,用于存储文件/目录的路径信息。
|
||||||
包含 DeviceID 字段,用于标记文件所属设备(磁盘)。
|
|
||||||
|
|
||||||
:param db_path: str, 数据库文件的路径
|
:param db_path: str, 数据库文件路径
|
||||||
:param table_name: str, 要创建的表名
|
:param table_name: str, 要创建的表名
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
@@ -295,39 +280,42 @@ def CreateDBPathTable(db_path='../src/db_path.db', table_name='db_path'):
|
|||||||
if directory and not os.path.exists(directory):
|
if directory and not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
# 连接到SQLite数据库(如果文件不存在会自动创建)
|
# 连接到SQLite数据库(如果不存在会自动创建)
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
|
conn.execute("PRAGMA foreign_keys = ON;") # 开启外键支持
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
# 动态构建创建表的SQL语句(包含 DeviceID 外键)
|
# 动态构建创建表的SQL语句
|
||||||
create_table_sql = f"""
|
create_table_sql = f"""
|
||||||
CREATE TABLE IF NOT EXISTS {table_name} (
|
CREATE TABLE IF NOT EXISTS {table_name} (
|
||||||
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
ID INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
-- DeviceID TEXT NOT NULL,
|
|
||||||
Path TEXT NOT NULL,
|
Path TEXT NOT NULL,
|
||||||
Name TEXT NOT NULL,
|
Name TEXT NOT NULL,
|
||||||
|
DirLayer INTEGER NOT NULL,
|
||||||
PathHash TEXT UNIQUE NOT NULL,
|
PathHash TEXT UNIQUE NOT NULL,
|
||||||
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
||||||
ParentID INTEGER,
|
ParentID INTEGER,
|
||||||
ContentSize INTEGER,
|
ContentSize INTEGER,
|
||||||
|
FileCreateTime TEXT,
|
||||||
|
FileModifyTime TEXT,
|
||||||
|
FileAccessTime TEXT,
|
||||||
|
FileAuthTime TEXT,
|
||||||
|
FileMode TEXT,
|
||||||
|
|
||||||
-- 外键约束
|
-- 外键约束(可选)
|
||||||
-- FOREIGN KEY(DeviceID) REFERENCES db_device(ID),
|
|
||||||
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 执行SQL语句
|
# 执行SQL语句
|
||||||
cursor.execute(create_table_sql)
|
cursor.execute(create_table_sql)
|
||||||
|
|
||||||
# 提交更改
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
print(f"表 [{table_name}] 已在数据库 [{db_path}] 中创建成功")
|
||||||
|
|
||||||
|
|
||||||
def CreateDBExtendNameTable(db_path='../src/db_extend_name.db', table_name='db_extend_name'):
|
def CreateDBExtendNameTable(db_path='../src/db_ntfs_info.db', table_name='db_extend_name'):
|
||||||
"""
|
"""
|
||||||
在指定路径下创建 SQLite 数据库,并在其中创建扩展名表。
|
在指定路径下创建 SQLite 数据库,并在其中创建扩展名表。
|
||||||
|
|
||||||
|
@@ -23,10 +23,37 @@ def ShouldSkipPath(path: str) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def GetDirLayer(full_path: str, volume_letter: str) -> int:
|
||||||
|
"""
|
||||||
|
根据路径计算目录层级。
|
||||||
|
|
||||||
|
示例:
|
||||||
|
Z:\demo.txt → 0
|
||||||
|
Z:\folder\test.txt → 1
|
||||||
|
Z:\folder\subfolder\file.txt → 2
|
||||||
|
|
||||||
|
参数:
|
||||||
|
full_path: str,完整路径
|
||||||
|
volume_letter: str,磁盘盘符(如 'Z')
|
||||||
|
|
||||||
|
返回:
|
||||||
|
int,层级数
|
||||||
|
"""
|
||||||
|
root_prefix = f"{volume_letter.upper()}:\\"
|
||||||
|
if not full_path.startswith(root_prefix):
|
||||||
|
return -1 # 非法路径
|
||||||
|
|
||||||
|
relative_path = full_path[len(root_prefix):]
|
||||||
|
if not relative_path:
|
||||||
|
return 0 # 根目录层级为 0
|
||||||
|
|
||||||
|
return len(relative_path.split(os.sep)) - 1
|
||||||
|
|
||||||
|
|
||||||
def ScanVolume(volume_letter: str):
|
def ScanVolume(volume_letter: str):
|
||||||
"""
|
"""
|
||||||
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
完整扫描指定磁盘的所有文件和目录,忽略 NTFS 元文件和系统文件夹,
|
||||||
并为每个节点分配 ParentID。
|
并为每个节点分配 ParentID 和 DirLayer。
|
||||||
|
|
||||||
返回:
|
返回:
|
||||||
list of dict:包含文件/目录信息的字典列表
|
list of dict:包含文件/目录信息的字典列表
|
||||||
@@ -36,7 +63,7 @@ def ScanVolume(volume_letter: str):
|
|||||||
raise ValueError(f"磁盘 {root_path} 不存在")
|
raise ValueError(f"磁盘 {root_path} 不存在")
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
path_to_id = {} # 用于记录路径到数据库 ID 的映射
|
path_to_id = {} # 用于记录路径到 ID 的映射
|
||||||
counter = 1 # 模拟数据库自增 ID
|
counter = 1 # 模拟数据库自增 ID
|
||||||
|
|
||||||
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
|
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
|
||||||
@@ -61,17 +88,20 @@ def ScanVolume(volume_letter: str):
|
|||||||
|
|
||||||
name = entry
|
name = entry
|
||||||
|
|
||||||
# ✅ 修正点:对 Path 字段进行哈希
|
# ✅ 对 Path 字段进行哈希
|
||||||
path_hash = GenerateHash(full_path)
|
path_hash = GenerateHash(full_path)
|
||||||
|
|
||||||
# 计算 ContentSize(KB),小文件至少显示为 1 KB
|
# ✅ 计算 ContentSize(KB),小文件至少显示为 1 KB
|
||||||
content_size = bytes_size // 1024
|
content_size = bytes_size // 1024
|
||||||
if content_size == 0 and bytes_size > 0:
|
if content_size == 0 and bytes_size > 0:
|
||||||
content_size = 1
|
content_size = 1
|
||||||
|
|
||||||
# 获取父目录路径
|
# ✅ 获取父目录路径
|
||||||
parent_path = os.path.dirname(full_path)
|
parent_path = os.path.dirname(full_path)
|
||||||
parent_id = path_to_id.get(parent_path, 0) # 默认为 0(根目录可能未录入)
|
parent_id = path_to_id.get(parent_path, 0)
|
||||||
|
|
||||||
|
# ✅ 计算 DirLayer(目录层级)
|
||||||
|
dir_layer = GetDirLayer(full_path, volume_letter)
|
||||||
|
|
||||||
item = {
|
item = {
|
||||||
"ID": counter,
|
"ID": counter,
|
||||||
@@ -80,7 +110,13 @@ def ScanVolume(volume_letter: str):
|
|||||||
"PathHash": path_hash,
|
"PathHash": path_hash,
|
||||||
"IsDir": is_dir,
|
"IsDir": is_dir,
|
||||||
"ParentID": parent_id,
|
"ParentID": parent_id,
|
||||||
"ContentSize": content_size
|
"ContentSize": content_size,
|
||||||
|
"DirLayer": dir_layer,
|
||||||
|
"FileCreateTime": "default",
|
||||||
|
"FileModifyTime": "default",
|
||||||
|
"FileAccessTime": "default",
|
||||||
|
"FileAuthTime": "default",
|
||||||
|
"FileMode": "default"
|
||||||
}
|
}
|
||||||
|
|
||||||
result.append(item)
|
result.append(item)
|
||||||
@@ -93,9 +129,15 @@ def ScanVolume(volume_letter: str):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
def InsertNewDBPathToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||||
"""
|
"""
|
||||||
批量将扫描结果写入数据库。
|
批量将扫描结果写入 NewDBPath 表中,支持新字段。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
data: list of dict,扫描结果数据
|
||||||
|
db_path: str,SQLite 数据库路径
|
||||||
|
table_name: str,目标表名
|
||||||
|
batch_size: int,每多少条提交一次
|
||||||
"""
|
"""
|
||||||
conn = sqlite3.connect(db_path)
|
conn = sqlite3.connect(db_path)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
@@ -111,6 +153,12 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
IsDir INTEGER NOT NULL CHECK(IsDir IN (0, 1)),
|
||||||
ParentID INTEGER,
|
ParentID INTEGER,
|
||||||
ContentSize INTEGER,
|
ContentSize INTEGER,
|
||||||
|
DirLayer INTEGER NOT NULL,
|
||||||
|
FileCreateTime TEXT,
|
||||||
|
FileModifyTime TEXT,
|
||||||
|
FileAccessTime TEXT,
|
||||||
|
FileAuthTime TEXT,
|
||||||
|
FileMode TEXT,
|
||||||
|
|
||||||
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
FOREIGN KEY(ParentID) REFERENCES {table_name}(ID)
|
||||||
);
|
);
|
||||||
@@ -120,8 +168,8 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
# 插入语句(忽略重复 PathHash)
|
# 插入语句(忽略重复 PathHash)
|
||||||
insert_sql = f"""
|
insert_sql = f"""
|
||||||
INSERT OR IGNORE INTO {table_name}
|
INSERT OR IGNORE INTO {table_name}
|
||||||
(Path, Name, PathHash, IsDir, ParentID, ContentSize)
|
(Path, Name, PathHash, IsDir, ParentID, ContentSize, DirLayer, FileCreateTime, FileModifyTime, FileAccessTime, FileAuthTime, FileMode)
|
||||||
VALUES (?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
total_inserted = 0
|
total_inserted = 0
|
||||||
@@ -134,7 +182,13 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
item['PathHash'],
|
item['PathHash'],
|
||||||
item['IsDir'],
|
item['IsDir'],
|
||||||
item['ParentID'] or 0,
|
item['ParentID'] or 0,
|
||||||
item['ContentSize']
|
item['ContentSize'],
|
||||||
|
item['DirLayer'],
|
||||||
|
item['FileCreateTime'],
|
||||||
|
item['FileModifyTime'],
|
||||||
|
item['FileAccessTime'],
|
||||||
|
item['FileAuthTime'],
|
||||||
|
item['FileMode']
|
||||||
))
|
))
|
||||||
|
|
||||||
if len(batch) >= batch_size:
|
if len(batch) >= batch_size:
|
||||||
@@ -151,7 +205,7 @@ def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_pa
|
|||||||
total_inserted += cursor.rowcount
|
total_inserted += cursor.rowcount
|
||||||
print(f"✅ 提交最后一批 {len(batch)} 条数据")
|
print(f"✅ 提交最后一批 {len(batch)} 条数据")
|
||||||
|
|
||||||
print(f"✅ 总共插入 {total_inserted} 条记录到数据库。")
|
print(f"✅ 总共插入 {total_inserted} 条记录到 NewDBPath 表。")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ 插入失败: {e}")
|
print(f"❌ 插入失败: {e}")
|
||||||
@@ -169,9 +223,9 @@ def main():
|
|||||||
scanned_data = ScanVolume(volume_letter)
|
scanned_data = ScanVolume(volume_letter)
|
||||||
|
|
||||||
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
|
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
|
||||||
InsertPathDataToDB(scanned_data)
|
InsertNewDBPathToDB(scanned_data)
|
||||||
|
|
||||||
print("✅ 全盘扫描与入库完成")
|
print("✅ 全盘扫描与 NewDBPath 表入库完成")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Binary file not shown.
Reference in New Issue
Block a user