analyze db_node table
This commit is contained in:
@@ -1,6 +1,49 @@
|
||||
import hashlib
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
|
||||
|
||||
def get_file_times(full_path):
|
||||
"""
|
||||
获取文件的创建、修改、访问时间,并格式化为字符串。
|
||||
|
||||
参数:
|
||||
full_path: str,文件路径
|
||||
|
||||
返回:
|
||||
tuple: (create_time, modify_time, access_time, auth_time)
|
||||
"""
|
||||
try:
|
||||
stat = os.stat(full_path)
|
||||
|
||||
# 转换为可读时间格式:YYYY-MM-DD HH:MM:SS
|
||||
def format_time(timestamp):
|
||||
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
|
||||
|
||||
create_time = format_time(stat.st_ctime)
|
||||
modify_time = format_time(stat.st_mtime)
|
||||
access_time = format_time(stat.st_atime)
|
||||
auth_time = format_time(stat.st_ctime) # Windows 上用 ctime 表示权限变化时间(近似)
|
||||
|
||||
return create_time, modify_time, access_time, auth_time
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 获取时间失败: {e}")
|
||||
return "unknown", "unknown", "unknown", "unknown"
|
||||
|
||||
|
||||
def get_file_mode(full_path):
|
||||
"""
|
||||
获取文件权限模式(Windows 下模拟)。
|
||||
可以根据只读、隐藏等属性扩展
|
||||
"""
|
||||
try:
|
||||
stat = os.stat(full_path)
|
||||
# 在 Windows 下模拟权限为 'default',也可用 bit mask 解析
|
||||
return "default"
|
||||
except Exception as e:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def GenerateHash(s: str) -> str:
|
||||
@@ -63,7 +106,7 @@ def ScanVolume(volume_letter: str):
|
||||
raise ValueError(f"磁盘 {root_path} 不存在")
|
||||
|
||||
result = []
|
||||
path_to_id = {} # 用于记录路径到 ID 的映射
|
||||
path_to_id = {} # 用于记录路径到数据库 ID 的映射
|
||||
counter = 1 # 模拟数据库自增 ID
|
||||
|
||||
for root, dirs, files in os.walk(root_path, topdown=True, onerror=None, followlinks=False):
|
||||
@@ -87,22 +130,21 @@ def ScanVolume(volume_letter: str):
|
||||
continue
|
||||
|
||||
name = entry
|
||||
|
||||
# ✅ 对 Path 字段进行哈希
|
||||
path_hash = GenerateHash(full_path)
|
||||
|
||||
# ✅ 计算 ContentSize(KB),小文件至少显示为 1 KB
|
||||
# 计算 ContentSize(KB),小文件至少显示为 1 KB
|
||||
content_size = bytes_size // 1024
|
||||
if content_size == 0 and bytes_size > 0:
|
||||
content_size = 1
|
||||
|
||||
# ✅ 获取父目录路径
|
||||
parent_path = os.path.dirname(full_path)
|
||||
parent_id = path_to_id.get(parent_path, 0)
|
||||
|
||||
# ✅ 计算 DirLayer(目录层级)
|
||||
dir_layer = GetDirLayer(full_path, volume_letter)
|
||||
|
||||
# ✅ 获取文件时间属性
|
||||
ctime, mtime, atime, chgtime = get_file_times(full_path)
|
||||
mode = get_file_mode(full_path)
|
||||
|
||||
item = {
|
||||
"ID": counter,
|
||||
"Path": full_path,
|
||||
@@ -112,11 +154,11 @@ def ScanVolume(volume_letter: str):
|
||||
"ParentID": parent_id,
|
||||
"ContentSize": content_size,
|
||||
"DirLayer": dir_layer,
|
||||
"FileCreateTime": "default",
|
||||
"FileModifyTime": "default",
|
||||
"FileAccessTime": "default",
|
||||
"FileAuthTime": "default",
|
||||
"FileMode": "default"
|
||||
"FileCreateTime": ctime,
|
||||
"FileModifyTime": mtime,
|
||||
"FileAccessTime": atime,
|
||||
"FileAuthTime": chgtime,
|
||||
"FileMode": mode
|
||||
}
|
||||
|
||||
result.append(item)
|
||||
@@ -129,7 +171,7 @@ def ScanVolume(volume_letter: str):
|
||||
return result
|
||||
|
||||
|
||||
def InsertNewDBPathToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||
def InsertPathDataToDB(data, db_path='../src/db_ntfs_info.db', table_name='db_path', batch_size=20):
|
||||
"""
|
||||
批量将扫描结果写入 NewDBPath 表中,支持新字段。
|
||||
|
||||
@@ -223,7 +265,7 @@ def main():
|
||||
scanned_data = ScanVolume(volume_letter)
|
||||
|
||||
print(f"📊 共扫描到 {len(scanned_data)} 条有效记录,开始入库...")
|
||||
InsertNewDBPathToDB(scanned_data)
|
||||
InsertPathDataToDB(scanned_data)
|
||||
|
||||
print("✅ 全盘扫描与 NewDBPath 表入库完成")
|
||||
|
||||
|
Reference in New Issue
Block a user