#!/usr/bin/env python3 """ 检查数据库files表和实际文件存储情况 """ import mysql.connector import os import hashlib def check_database_files(): """检查数据库中的文件记录""" try: # 连接数据库 conn = mysql.connector.connect( host="101.126.85.76", user="mytest_db", password="mytest_db", database="mytest_db" ) cursor = conn.cursor() # 查询files表中的所有记录 cursor.execute(""" SELECT id, user_id, original_filename, filename, file_path, file_size, file_hash, mime_type, created_at FROM files ORDER BY created_at DESC """) files = cursor.fetchall() print("=== 数据库 files 表中的记录 ===") if files: for file in files: (id, user_id, original_filename, filename, file_path, file_size, file_hash, mime_type, created_at) = file print(f"ID: {id}") print(f" 用户ID: {user_id}") print(f" 原始文件名: {original_filename}") print(f" 存储文件名: {filename}") print(f" 文件路径: {file_path}") print(f" 文件大小: {file_size} bytes") print(f" 文件哈希: {file_hash}") print(f" MIME类型: {mime_type}") print(f" 创建时间: {created_at}") print("-" * 50) else: print("files 表中没有记录") print(f"\n总记录数: {len(files)}") # 检查每个文件是否真实存在 print("\n=== 文件存在性检查 ===") existing_count = 0 missing_files = [] for file in files: (id, user_id, original_filename, filename, file_path, file_size, file_hash, mime_type, created_at) = file full_path = os.path.join("uploads", filename) if os.path.exists(full_path): existing_count += 1 print(f"✅ ID {id}: {original_filename} - 文件存在") # 检查文件大小 actual_size = os.path.getsize(full_path) if actual_size != file_size: print(f" ⚠️ 文件大小不匹配! 数据库: {file_size}, 实际: {actual_size}") # 检查文件哈希 try: with open(full_path, 'rb') as f: content = f.read() actual_hash = hashlib.sha256(content).hexdigest() if actual_hash != file_hash: print(f" ❌ 文件哈希不匹配! 数据库: {file_hash}") print(f" 实际: {actual_hash}") except Exception as e: print(f" ❌ 无法读取文件或计算哈希: {e}") else: missing_files.append((id, original_filename, filename)) print(f"❌ ID {id}: {original_filename} - 文件不存在!") print(f"\n实际存在的文件: {existing_count}") print(f"缺失的文件: {len(missing_files)}") if missing_files: print("\n缺失文件详情:") for (id, original_filename, filename) in missing_files: print(f" ID {id}: {original_filename} (应存储为: {filename})") except Exception as e: print(f"数据库查询出错: {e}") finally: if 'conn' in locals() and conn.is_connected(): cursor.close() conn.close() def check_uploads_directory(): """检查uploads目录中的实际文件""" print("\n=== uploads 目录中的实际文件 ===") uploads_dir = "uploads" if os.path.exists(uploads_dir): files = os.listdir(uploads_dir) if files: print(f"目录: {uploads_dir}") print(f"文件数量: {len(files)}") for file in files: file_path = os.path.join(uploads_dir, file) file_size = os.path.getsize(file_path) # 计算文件哈希 try: with open(file_path, 'rb') as f: content = f.read() file_hash = hashlib.sha256(content).hexdigest() # 尝试读取文本内容 try: with open(file_path, 'r', encoding='utf-8') as f: text_content = f.read() content_preview = text_content[:100] + "..." if len(text_content) > 100 else text_content content_preview = repr(content_preview) # 显示引号和特殊字符 except: content_preview = "(二进制文件)" except Exception as e: file_hash = f"无法计算哈希: {e}" content_preview = f"无法读取: {e}" print(f"\n📄 {file}") print(f" 大小: {file_size} bytes") print(f" 哈希: {file_hash}") print(f" 内容预览: {content_preview}") else: print(f"目录 {uploads_dir} 为空") else: print(f"目录 {uploads_dir} 不存在") def check_file_integrity(): """检查文件完整性,对比数据库和实际文件""" print("\n=== 文件完整性检查 ===") try: # 连接数据库 conn = mysql.connector.connect( host="101.126.85.76", user="mytest_db", password="mytest_db", database="mytest_db" ) cursor = conn.cursor() # 查询所有文件 cursor.execute("SELECT id, filename, file_hash, file_size FROM files") db_files = cursor.fetchall() integrity_issues = [] for (id, filename, expected_hash, expected_size) in db_files: full_path = os.path.join("uploads", filename) if os.path.exists(full_path): # 检查大小 actual_size = os.path.getsize(full_path) if actual_size != expected_size: integrity_issues.append(f"ID {id}: 文件大小不匹配 (期望: {expected_size}, 实际: {actual_size})") continue # 检查哈希 try: with open(full_path, 'rb') as f: content = f.read() actual_hash = hashlib.sha256(content).hexdigest() if actual_hash != expected_hash: integrity_issues.append(f"ID {id}: 文件哈希不匹配") print(f" 期望哈希: {expected_hash}") print(f" 实际哈希: {actual_hash}") except Exception as e: integrity_issues.append(f"ID {id}: 无法计算文件哈希 - {e}") else: integrity_issues.append(f"ID {id}: 文件不存在") if integrity_issues: print(f"❌ 发现 {len(integrity_issues)} 个完整性问题:") for issue in integrity_issues: print(f" - {issue}") else: print("✅ 所有文件完整性检查通过!") except Exception as e: print(f"完整性检查出错: {e}") finally: if 'conn' in locals() and conn.is_connected(): cursor.close() conn.close() if __name__ == "__main__": check_database_files() check_uploads_directory() check_file_integrity() print("\n=== 总结 ===") print("文件存储情况:") print("1. 数据库存储文件的元数据信息") print("2. 实际文件存储在 backend/uploads/ 目录") print("3. 文件名使用UUID格式确保唯一性") print("4. 通过file_hash确保文件完整性") print("5. 支持文件去重功能")