219 lines
7.8 KiB
Python
219 lines
7.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
检查数据库files表和实际文件存储情况
|
|
"""
|
|
|
|
import mysql.connector
|
|
import os
|
|
import hashlib
|
|
|
|
def check_database_files():
|
|
"""检查数据库中的文件记录"""
|
|
|
|
try:
|
|
# 连接数据库
|
|
conn = mysql.connector.connect(
|
|
host="101.126.85.76",
|
|
user="mytest_db",
|
|
password="mytest_db",
|
|
database="mytest_db"
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
# 查询files表中的所有记录
|
|
cursor.execute("""
|
|
SELECT id, user_id, original_filename, filename, file_path, file_size,
|
|
file_hash, mime_type, created_at
|
|
FROM files
|
|
ORDER BY created_at DESC
|
|
""")
|
|
|
|
files = cursor.fetchall()
|
|
|
|
print("=== 数据库 files 表中的记录 ===")
|
|
if files:
|
|
for file in files:
|
|
(id, user_id, original_filename, filename, file_path,
|
|
file_size, file_hash, mime_type, created_at) = file
|
|
|
|
print(f"ID: {id}")
|
|
print(f" 用户ID: {user_id}")
|
|
print(f" 原始文件名: {original_filename}")
|
|
print(f" 存储文件名: {filename}")
|
|
print(f" 文件路径: {file_path}")
|
|
print(f" 文件大小: {file_size} bytes")
|
|
print(f" 文件哈希: {file_hash}")
|
|
print(f" MIME类型: {mime_type}")
|
|
print(f" 创建时间: {created_at}")
|
|
print("-" * 50)
|
|
else:
|
|
print("files 表中没有记录")
|
|
|
|
print(f"\n总记录数: {len(files)}")
|
|
|
|
# 检查每个文件是否真实存在
|
|
print("\n=== 文件存在性检查 ===")
|
|
existing_count = 0
|
|
missing_files = []
|
|
|
|
for file in files:
|
|
(id, user_id, original_filename, filename, file_path,
|
|
file_size, file_hash, mime_type, created_at) = file
|
|
|
|
full_path = os.path.join("uploads", filename)
|
|
if os.path.exists(full_path):
|
|
existing_count += 1
|
|
print(f"✅ ID {id}: {original_filename} - 文件存在")
|
|
|
|
# 检查文件大小
|
|
actual_size = os.path.getsize(full_path)
|
|
if actual_size != file_size:
|
|
print(f" ⚠️ 文件大小不匹配! 数据库: {file_size}, 实际: {actual_size}")
|
|
|
|
# 检查文件哈希
|
|
try:
|
|
with open(full_path, 'rb') as f:
|
|
content = f.read()
|
|
actual_hash = hashlib.sha256(content).hexdigest()
|
|
if actual_hash != file_hash:
|
|
print(f" ❌ 文件哈希不匹配! 数据库: {file_hash}")
|
|
print(f" 实际: {actual_hash}")
|
|
except Exception as e:
|
|
print(f" ❌ 无法读取文件或计算哈希: {e}")
|
|
|
|
else:
|
|
missing_files.append((id, original_filename, filename))
|
|
print(f"❌ ID {id}: {original_filename} - 文件不存在!")
|
|
|
|
print(f"\n实际存在的文件: {existing_count}")
|
|
print(f"缺失的文件: {len(missing_files)}")
|
|
|
|
if missing_files:
|
|
print("\n缺失文件详情:")
|
|
for (id, original_filename, filename) in missing_files:
|
|
print(f" ID {id}: {original_filename} (应存储为: {filename})")
|
|
|
|
except Exception as e:
|
|
print(f"数据库查询出错: {e}")
|
|
finally:
|
|
if 'conn' in locals() and conn.is_connected():
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
def check_uploads_directory():
|
|
"""检查uploads目录中的实际文件"""
|
|
|
|
print("\n=== uploads 目录中的实际文件 ===")
|
|
|
|
uploads_dir = "uploads"
|
|
if os.path.exists(uploads_dir):
|
|
files = os.listdir(uploads_dir)
|
|
if files:
|
|
print(f"目录: {uploads_dir}")
|
|
print(f"文件数量: {len(files)}")
|
|
|
|
for file in files:
|
|
file_path = os.path.join(uploads_dir, file)
|
|
file_size = os.path.getsize(file_path)
|
|
|
|
# 计算文件哈希
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
content = f.read()
|
|
file_hash = hashlib.sha256(content).hexdigest()
|
|
|
|
# 尝试读取文本内容
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
text_content = f.read()
|
|
content_preview = text_content[:100] + "..." if len(text_content) > 100 else text_content
|
|
content_preview = repr(content_preview) # 显示引号和特殊字符
|
|
except:
|
|
content_preview = "(二进制文件)"
|
|
|
|
except Exception as e:
|
|
file_hash = f"无法计算哈希: {e}"
|
|
content_preview = f"无法读取: {e}"
|
|
|
|
print(f"\n📄 {file}")
|
|
print(f" 大小: {file_size} bytes")
|
|
print(f" 哈希: {file_hash}")
|
|
print(f" 内容预览: {content_preview}")
|
|
else:
|
|
print(f"目录 {uploads_dir} 为空")
|
|
else:
|
|
print(f"目录 {uploads_dir} 不存在")
|
|
|
|
def check_file_integrity():
|
|
"""检查文件完整性,对比数据库和实际文件"""
|
|
|
|
print("\n=== 文件完整性检查 ===")
|
|
|
|
try:
|
|
# 连接数据库
|
|
conn = mysql.connector.connect(
|
|
host="101.126.85.76",
|
|
user="mytest_db",
|
|
password="mytest_db",
|
|
database="mytest_db"
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
# 查询所有文件
|
|
cursor.execute("SELECT id, filename, file_hash, file_size FROM files")
|
|
db_files = cursor.fetchall()
|
|
|
|
integrity_issues = []
|
|
|
|
for (id, filename, expected_hash, expected_size) in db_files:
|
|
full_path = os.path.join("uploads", filename)
|
|
|
|
if os.path.exists(full_path):
|
|
# 检查大小
|
|
actual_size = os.path.getsize(full_path)
|
|
if actual_size != expected_size:
|
|
integrity_issues.append(f"ID {id}: 文件大小不匹配 (期望: {expected_size}, 实际: {actual_size})")
|
|
continue
|
|
|
|
# 检查哈希
|
|
try:
|
|
with open(full_path, 'rb') as f:
|
|
content = f.read()
|
|
actual_hash = hashlib.sha256(content).hexdigest()
|
|
|
|
if actual_hash != expected_hash:
|
|
integrity_issues.append(f"ID {id}: 文件哈希不匹配")
|
|
print(f" 期望哈希: {expected_hash}")
|
|
print(f" 实际哈希: {actual_hash}")
|
|
|
|
except Exception as e:
|
|
integrity_issues.append(f"ID {id}: 无法计算文件哈希 - {e}")
|
|
else:
|
|
integrity_issues.append(f"ID {id}: 文件不存在")
|
|
|
|
if integrity_issues:
|
|
print(f"❌ 发现 {len(integrity_issues)} 个完整性问题:")
|
|
for issue in integrity_issues:
|
|
print(f" - {issue}")
|
|
else:
|
|
print("✅ 所有文件完整性检查通过!")
|
|
|
|
except Exception as e:
|
|
print(f"完整性检查出错: {e}")
|
|
finally:
|
|
if 'conn' in locals() and conn.is_connected():
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
if __name__ == "__main__":
|
|
check_database_files()
|
|
check_uploads_directory()
|
|
check_file_integrity()
|
|
|
|
print("\n=== 总结 ===")
|
|
print("文件存储情况:")
|
|
print("1. 数据库存储文件的元数据信息")
|
|
print("2. 实际文件存储在 backend/uploads/ 目录")
|
|
print("3. 文件名使用UUID格式确保唯一性")
|
|
print("4. 通过file_hash确保文件完整性")
|
|
print("5. 支持文件去重功能") |