analyze db_node table

This commit is contained in:
Burgess Leo
2025-05-16 17:45:35 +08:00
parent 7d21842287
commit ae777f75d9
8 changed files with 516 additions and 17 deletions

View File

@@ -1,6 +1,49 @@
import hashlib
import os
import sqlite3
import time
def get_file_times(full_path):
"""
获取文件的创建、修改、访问时间,并格式化为字符串。
参数:
full_path: str文件路径
返回:
tuple: (create_time, modify_time, access_time, auth_time)
"""
try:
stat = os.stat(full_path)
# 转换为可读时间格式YYYY-MM-DD HH:MM:SS
def format_time(timestamp):
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
create_time = format_time(stat.st_ctime)
modify_time = format_time(stat.st_mtime)
access_time = format_time(stat.st_atime)
auth_time = format_time(stat.st_ctime) # Windows 上用 ctime 表示权限变化时间(近似)
return create_time, modify_time, access_time, auth_time
except Exception as e:
print(f"⚠️ 获取时间失败: {e}")
return "unknown", "unknown", "unknown", "unknown"
def get_file_mode(full_path):
"""
获取文件权限模式Windows 下模拟)。
可以根据只读、隐藏等属性扩展
"""
try:
stat = os.stat(full_path)
# 在 Windows 下模拟权限为 'default',也可用 bit mask 解析
return "default"
except Exception as e:
return "unknown"
def GenerateHash(s: str) -> str:
@@ -63,7 +106,7 @@ def ScanVolume(volume_letter: str):
raise ValueError(f"磁盘 {root_path} 不存在")
result = []
path_to_id = {} # 用于记录路径到 ID 的映射
path_to_id = {} # 用于记录路径到数据库 ID 的映射
counter = 1 # 模拟数据库自增 ID
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
@@ -87,22 +130,21 @@ def ScanVolume(volume_letter: str):
continue
name = entry
# ✅ 对 Path 字段进行哈希
path_hash = GenerateHash(full_path)
# 计算 ContentSizeKB小文件至少显示为 1 KB
# 计算 ContentSizeKB小文件至少显示为 1 KB
content_size = bytes_size // 1024
if content_size == 0 and bytes_size > 0:
content_size = 1
# ✅ 获取父目录路径
parent_path = os.path.dirname(full_path)
parent_id = path_to_id.get(parent_path, 0)
# ✅ 计算 DirLayer目录层级
dir_layer = GetDirLayer(full_path, volume_letter)
# ✅ 获取文件时间属性
ctime, mtime, atime, chgtime = get_file_times(full_path)
mode = get_file_mode(full_path)
item = {
"ID": counter,
"Path": full_path,
@@ -112,11 +154,11 @@ def ScanVolume(volume_letter: str):
"ParentID": parent_id,
"ContentSize": content_size,
"DirLayer": dir_layer,
"FileCreateTime": "default",
"FileModifyTime": "default",
"FileAccessTime": "default",
"FileAuthTime": "default",
"FileMode": "default"
"FileCreateTime": ctime,
"FileModifyTime": mtime,
"FileAccessTime": atime,
"FileAuthTime": chgtime,
"FileMode": mode
}
result.append(item)
@@ -129,7 +171,7 @@ def ScanVolume(volume_letter: str):
return result
def InsertNewDBPathToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
"""
批量将扫描结果写入 NewDBPath 表中,支持新字段。
@@ -223,7 +265,7 @@ def main():
scanned_data = ScanVolume(volume_letter)
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
InsertNewDBPathToDB(scanned_data)
InsertPathDataToDB(scanned_data)
print("✅ 全盘扫描与 NewDBPath 表入库完成")