Files
full-stack-doc/backend/check_files_table.py
2025-10-14 20:05:29 +08:00

219 lines
7.8 KiB
Python

#!/usr/bin/env python3
"""
检查数据库files表和实际文件存储情况
"""
import mysql.connector
import os
import hashlib
def check_database_files():
"""检查数据库中的文件记录"""
try:
# 连接数据库
conn = mysql.connector.connect(
host="101.126.85.76",
user="mytest_db",
password="mytest_db",
database="mytest_db"
)
cursor = conn.cursor()
# 查询files表中的所有记录
cursor.execute("""
SELECT id, user_id, original_filename, filename, file_path, file_size,
file_hash, mime_type, created_at
FROM files
ORDER BY created_at DESC
""")
files = cursor.fetchall()
print("=== 数据库 files 表中的记录 ===")
if files:
for file in files:
(id, user_id, original_filename, filename, file_path,
file_size, file_hash, mime_type, created_at) = file
print(f"ID: {id}")
print(f" 用户ID: {user_id}")
print(f" 原始文件名: {original_filename}")
print(f" 存储文件名: {filename}")
print(f" 文件路径: {file_path}")
print(f" 文件大小: {file_size} bytes")
print(f" 文件哈希: {file_hash}")
print(f" MIME类型: {mime_type}")
print(f" 创建时间: {created_at}")
print("-" * 50)
else:
print("files 表中没有记录")
print(f"\n总记录数: {len(files)}")
# 检查每个文件是否真实存在
print("\n=== 文件存在性检查 ===")
existing_count = 0
missing_files = []
for file in files:
(id, user_id, original_filename, filename, file_path,
file_size, file_hash, mime_type, created_at) = file
full_path = os.path.join("uploads", filename)
if os.path.exists(full_path):
existing_count += 1
print(f"✅ ID {id}: {original_filename} - 文件存在")
# 检查文件大小
actual_size = os.path.getsize(full_path)
if actual_size != file_size:
print(f" ⚠️ 文件大小不匹配! 数据库: {file_size}, 实际: {actual_size}")
# 检查文件哈希
try:
with open(full_path, 'rb') as f:
content = f.read()
actual_hash = hashlib.sha256(content).hexdigest()
if actual_hash != file_hash:
print(f" ❌ 文件哈希不匹配! 数据库: {file_hash}")
print(f" 实际: {actual_hash}")
except Exception as e:
print(f" ❌ 无法读取文件或计算哈希: {e}")
else:
missing_files.append((id, original_filename, filename))
print(f"❌ ID {id}: {original_filename} - 文件不存在!")
print(f"\n实际存在的文件: {existing_count}")
print(f"缺失的文件: {len(missing_files)}")
if missing_files:
print("\n缺失文件详情:")
for (id, original_filename, filename) in missing_files:
print(f" ID {id}: {original_filename} (应存储为: {filename})")
except Exception as e:
print(f"数据库查询出错: {e}")
finally:
if 'conn' in locals() and conn.is_connected():
cursor.close()
conn.close()
def check_uploads_directory():
"""检查uploads目录中的实际文件"""
print("\n=== uploads 目录中的实际文件 ===")
uploads_dir = "uploads"
if os.path.exists(uploads_dir):
files = os.listdir(uploads_dir)
if files:
print(f"目录: {uploads_dir}")
print(f"文件数量: {len(files)}")
for file in files:
file_path = os.path.join(uploads_dir, file)
file_size = os.path.getsize(file_path)
# 计算文件哈希
try:
with open(file_path, 'rb') as f:
content = f.read()
file_hash = hashlib.sha256(content).hexdigest()
# 尝试读取文本内容
try:
with open(file_path, 'r', encoding='utf-8') as f:
text_content = f.read()
content_preview = text_content[:100] + "..." if len(text_content) > 100 else text_content
content_preview = repr(content_preview) # 显示引号和特殊字符
except:
content_preview = "(二进制文件)"
except Exception as e:
file_hash = f"无法计算哈希: {e}"
content_preview = f"无法读取: {e}"
print(f"\n📄 {file}")
print(f" 大小: {file_size} bytes")
print(f" 哈希: {file_hash}")
print(f" 内容预览: {content_preview}")
else:
print(f"目录 {uploads_dir} 为空")
else:
print(f"目录 {uploads_dir} 不存在")
def check_file_integrity():
"""检查文件完整性,对比数据库和实际文件"""
print("\n=== 文件完整性检查 ===")
try:
# 连接数据库
conn = mysql.connector.connect(
host="101.126.85.76",
user="mytest_db",
password="mytest_db",
database="mytest_db"
)
cursor = conn.cursor()
# 查询所有文件
cursor.execute("SELECT id, filename, file_hash, file_size FROM files")
db_files = cursor.fetchall()
integrity_issues = []
for (id, filename, expected_hash, expected_size) in db_files:
full_path = os.path.join("uploads", filename)
if os.path.exists(full_path):
# 检查大小
actual_size = os.path.getsize(full_path)
if actual_size != expected_size:
integrity_issues.append(f"ID {id}: 文件大小不匹配 (期望: {expected_size}, 实际: {actual_size})")
continue
# 检查哈希
try:
with open(full_path, 'rb') as f:
content = f.read()
actual_hash = hashlib.sha256(content).hexdigest()
if actual_hash != expected_hash:
integrity_issues.append(f"ID {id}: 文件哈希不匹配")
print(f" 期望哈希: {expected_hash}")
print(f" 实际哈希: {actual_hash}")
except Exception as e:
integrity_issues.append(f"ID {id}: 无法计算文件哈希 - {e}")
else:
integrity_issues.append(f"ID {id}: 文件不存在")
if integrity_issues:
print(f"❌ 发现 {len(integrity_issues)} 个完整性问题:")
for issue in integrity_issues:
print(f" - {issue}")
else:
print("✅ 所有文件完整性检查通过!")
except Exception as e:
print(f"完整性检查出错: {e}")
finally:
if 'conn' in locals() and conn.is_connected():
cursor.close()
conn.close()
if __name__ == "__main__":
check_database_files()
check_uploads_directory()
check_file_integrity()
print("\n=== 总结 ===")
print("文件存储情况:")
print("1. 数据库存储文件的元数据信息")
print("2. 实际文件存储在 backend/uploads/ 目录")
print("3. 文件名使用UUID格式确保唯一性")
print("4. 通过file_hash确保文件完整性")
print("5. 支持文件去重功能")