diff --git a/01_data_exploration_and_visualization.ipynb b/01_data_exploration_and_visualization.ipynb
new file mode 100644
index 0000000..3296599
--- /dev/null
+++ b/01_data_exploration_and_visualization.ipynb
@@ -0,0 +1,368 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 农业数据集探索与可视化分析\n",
+    "\n",
+    "本 Notebook 演示如何加载和分析作物病害、气象、产量等多类数据集，并进行可视化展示。\n",
+    "\n",
+    "**场景：** 数据分析 / 可视化\n",
+    "\n",
+    "**数据集：**\n",
+    "- CSV: 作物病害标注表\n",
+    "- TSV: 农业气象数据\n",
+    "- Parquet: 作物产量统计"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. 环境准备"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib\n",
+    "import numpy as np\n",
+    "import json\n",
+    "import os\n",
+    "\n",
+    "# 设置中文字体\n",
+    "matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'WenQuanYi Micro Hei', 'DejaVu Sans']\n",
+    "matplotlib.rcParams['axes.unicode_minus'] = False\n",
+    "\n",
+    "# 数据路径\n",
+    "DATA_DIR = os.path.dirname(os.path.abspath(''))\n",
+    "print('数据目录:', DATA_DIR)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. 加载 CSV - 作物病害标注表"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载 CSV 数据\n",
+    "df_csv = pd.read_csv(f'{DATA_DIR}/csv/作物病害标注表.csv')\n",
+    "print(f'数据集大小: {df_csv.shape}')\n",
+    "df_csv.head(10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 数据基本信息\n",
+    "print('=== 数据类型 ===')\n",
+    "print(df_csv.dtypes)\n",
+    "print('\\n=== 缺失值统计 ===')\n",
+    "print(df_csv.isnull().sum())\n",
+    "print('\\n=== 基本统计 ===')\n",
+    "df_csv.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. 作物病害分布分析"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
+    "\n",
+    "# 3.1 各作物病害记录数\n",
+    "crop_counts = df_csv['作物'].value_counts()\n",
+    "axes[0, 0].bar(crop_counts.index, crop_counts.values, color=plt.cm.Set3.colors[:len(crop_counts)])\n",
+    "axes[0, 0].set_title('各作物病害记录数')\n",
+    "axes[0, 0].set_xlabel('作物')\n",
+    "axes[0, 0].set_ylabel('记录数')\n",
+    "axes[0, 0].tick_params(axis='x', rotation=45)\n",
+    "\n",
+    "# 3.2 严重程度分布（饼图）\n",
+    "severity_counts = df_csv['严重程度'].value_counts()\n",
+    "axes[0, 1].pie(severity_counts.values, labels=severity_counts.index, autopct='%1.1f%%',\n",
+    "               colors=plt.cm.RdYlGn_r(np.linspace(0.1, 0.9, len(severity_counts))))\n",
+    "axes[0, 1].set_title('病害严重程度分布')\n",
+    "\n",
+    "# 3.3 各地区病害记录分布\n",
+    "region_counts = df_csv['地区'].value_counts()\n",
+    "axes[1, 0].barh(region_counts.index, region_counts.values, color=plt.cm.Paired.colors[:len(region_counts)])\n",
+    "axes[1, 0].set_title('各地区病害记录分布')\n",
+    "axes[1, 0].set_xlabel('记录数')\n",
+    "\n",
+    "# 3.4 温度与湿度散点图\n",
+    "colors = {'轻微': 'green', '轻度': 'yellowgreen', '中度': 'orange', '重度': 'red', '极重度': 'darkred'}\n",
+    "for sev, color in colors.items():\n",
+    "    mask = df_csv['严重程度'] == sev\n",
+    "    axes[1, 1].scatter(df_csv.loc[mask, '温度_℃'], df_csv.loc[mask, '湿度_%'],\n",
+    "                      c=color, label=sev, alpha=0.6, s=20)\n",
+    "axes[1, 1].set_title('温度与湿度关系（按严重程度着色）')\n",
+    "axes[1, 1].set_xlabel('温度 (℃)')\n",
+    "axes[1, 1].set_ylabel('湿度 (%)')\n",
+    "axes[1, 1].legend(fontsize=8)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig('作物病害分析图表.png', dpi=150, bbox_inches='tight')\n",
+    "plt.show()\n",
+    "print('图表已保存')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. 加载 TSV - 农业气象数据分析"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载 TSV 数据\n",
+    "df_weather = pd.read_csv(f'{DATA_DIR}/tsv/农业气象数据.tsv', sep='\\t')\n",
+    "df_weather['日期'] = pd.to_datetime(df_weather['日期'])\n",
+    "print(f'气象数据集大小: {df_weather.shape}')\n",
+    "df_weather.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
+    "\n",
+    "# 按月平均温度趋势\n",
+    "df_weather['月份'] = df_weather['日期'].dt.month\n",
+    "monthly_temp = df_weather.groupby('月份')['平均温度_℃'].agg(['mean', 'min', 'max'])\n",
+    "axes[0, 0].plot(monthly_temp.index, monthly_temp['mean'], 'r-o', label='平均温度')\n",
+    "axes[0, 0].fill_between(monthly_temp.index, monthly_temp['min'], monthly_temp['max'], alpha=0.2, color='red')\n",
+    "axes[0, 0].set_title('月度温度趋势')\n",
+    "axes[0, 0].set_xlabel('月份')\n",
+    "axes[0, 0].set_ylabel('温度 (℃)')\n",
+    "axes[0, 0].legend()\n",
+    "axes[0, 0].grid(True, alpha=0.3)\n",
+    "\n",
+    "# 月度降水量\n",
+    "monthly_rain = df_weather.groupby('月份')['降水量_mm'].sum()\n",
+    "axes[0, 1].bar(monthly_rain.index, monthly_rain.values, color='steelblue', alpha=0.8)\n",
+    "axes[0, 1].set_title('月度累计降水量')\n",
+    "axes[0, 1].set_xlabel('月份')\n",
+    "axes[0, 1].set_ylabel('降水量 (mm)')\n",
+    "\n",
+    "# 湿度分布直方图\n",
+    "axes[1, 0].hist(df_weather['相对湿度_%'], bins=30, color='teal', alpha=0.7, edgecolor='white')\n",
+    "axes[1, 0].axvline(df_weather['相对湿度_%'].mean(), color='red', linestyle='--', label=f'均值: {df_weather[\"相对湿度_%\"].mean():.1f}%')\n",
+    "axes[1, 0].set_title('相对湿度分布')\n",
+    "axes[1, 0].set_xlabel('湿度 (%)')\n",
+    "axes[1, 0].legend()\n",
+    "\n",
+    "# 天气状况统计\n",
+    "weather_counts = df_weather['天气状况'].value_counts()\n",
+    "axes[1, 1].pie(weather_counts.values, labels=weather_counts.index, autopct='%1.1f%%',\n",
+    "              colors=plt.cm.Set2.colors[:len(weather_counts)])\n",
+    "axes[1, 1].set_title('天气状况分布')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. 加载 Parquet - 作物产量分析"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载 Parquet 数据\n",
+    "df_yield = pd.read_parquet(f'{DATA_DIR}/parquet/作物产量统计.parquet')\n",
+    "print(f'产量数据集大小: {df_yield.shape}')\n",
+    "df_yield.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
+    "\n",
+    "# 各作物平均亩产\n",
+    "crop_yield = df_yield.groupby('作物')['亩产_公斤'].mean().sort_values(ascending=False)\n",
+    "axes[0, 0].bar(crop_yield.index, crop_yield.values, color=plt.cm.tab10.colors[:len(crop_yield)])\n",
+    "axes[0, 0].set_title('各作物平均亩产')\n",
+    "axes[0, 0].set_ylabel('亩产 (公斤)')\n",
+    "axes[0, 0].tick_params(axis='x', rotation=45)\n",
+    "\n",
+    "# 年份产量趋势\n",
+    "yearly = df_yield.groupby('年份')['亩产_公斤'].mean()\n",
+    "axes[0, 1].plot(yearly.index, yearly.values, 'b-s', markersize=8)\n",
+    "axes[0, 1].set_title('年度平均亩产趋势')\n",
+    "axes[0, 1].set_ylabel('亩产 (公斤)')\n",
+    "axes[0, 1].grid(True, alpha=0.3)\n",
+    "\n",
+    "# 灌溉方式对产量的影响\n",
+    "irrigation = df_yield.groupby('灌溉方式')['亩产_公斤'].mean().sort_values()\n",
+    "axes[1, 0].barh(irrigation.index, irrigation.values, color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4'])\n",
+    "axes[1, 0].set_title('不同灌溉方式平均亩产')\n",
+    "axes[1, 0].set_xlabel('亩产 (公斤)')\n",
+    "\n",
+    "# 自然灾害对产量影响\n",
+    "disaster = df_yield.groupby('自然灾害')['亩产_公斤'].mean().sort_values()\n",
+    "axes[1, 1].bar(disaster.index, disaster.values, color=['#2ecc71' if x == '无' else '#e74c3c' for x in disaster.index])\n",
+    "axes[1, 1].set_title('自然灾害对亩产影响')\n",
+    "axes[1, 1].set_ylabel('亩产 (公斤)')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. 加载 COCO JSON - 标注数据分析"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载 COCO 格式标注\n",
+    "with open(f'{DATA_DIR}/json/作物病害检测_COCO格式.json', 'r', encoding='utf-8') as f:\n",
+    "    coco = json.load(f)\n",
+    "\n",
+    "print(f'图片数量: {len(coco[\"images\"])}')\n",
+    "print(f'标注数量: {len(coco[\"annotations\"])}')\n",
+    "print(f'类别数量: {len(coco[\"categories\"])}')\n",
+    "print('\\n类别列表:')\n",
+    "for cat in coco['categories']:\n",
+    "    count = sum(1 for a in coco['annotations'] if a['category_id'] == cat['id'])\n",
+    "    print(f'  {cat[\"name\"]}: {count} 个标注')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# COCO 标注统计可视化\n",
+    "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
+    "\n",
+    "# 各类别标注数量\n",
+    "cat_names = {c['id']: c['name'] for c in coco['categories']}\n",
+    "cat_counts = {}\n",
+    "for a in coco['annotations']:\n",
+    "    name = cat_names[a['category_id']]\n",
+    "    cat_counts[name] = cat_counts.get(name, 0) + 1\n",
+    "\n",
+    "axes[0].bar(cat_counts.keys(), cat_counts.values(), color=plt.cm.Paired.colors[:len(cat_counts)])\n",
+    "axes[0].set_title('各类别标注数量')\n",
+    "axes[0].tick_params(axis='x', rotation=45)\n",
+    "\n",
+    "# 每张图标注数量分布\n",
+    "img_ann_counts = {}\n",
+    "for a in coco['annotations']:\n",
+    "    img_ann_counts[a['image_id']] = img_ann_counts.get(a['image_id'], 0) + 1\n",
+    "counts = list(img_ann_counts.values())\n",
+    "axes[1].hist(counts, bins=range(1, max(counts)+2), color='steelblue', alpha=0.7, edgecolor='white')\n",
+    "axes[1].set_title('每张图标注数量分布')\n",
+    "axes[1].set_xlabel('标注数量')\n",
+    "axes[1].set_ylabel('图片数')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. 数据质量报告"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('=' * 60)\n",
+    "print('           农业数据集质量报告')\n",
+    "print('=' * 60)\n",
+    "print(f'\\n📋 CSV 作物病害标注表:')\n",
+    "print(f'   记录数: {len(df_csv)}')\n",
+    "print(f'   字段数: {len(df_csv.columns)}')\n",
+    "print(f'   缺失值: {df_csv.isnull().sum().sum()}')\n",
+    "print(f'   作物种类: {df_csv[\"作物\"].nunique()} 种')\n",
+    "print(f'   病害类型: {df_csv[\"病害名称\"].nunique()} 种')\n",
+    "\n",
+    "print(f'\\n📋 TSV 农业气象数据:')\n",
+    "print(f'   记录数: {len(df_weather)}')\n",
+    "print(f'   字段数: {len(df_weather.columns)}')\n",
+    "print(f'   时间范围: {df_weather[\"日期\"].min()} ~ {df_weather[\"日期\"].max()}')\n",
+    "\n",
+    "print(f'\\n📋 Parquet 作物产量统计:')\n",
+    "print(f'   记录数: {len(df_yield)}')\n",
+    "print(f'   字段数: {len(df_yield.columns)}')\n",
+    "print(f'   年份范围: {df_yield[\"年份\"].min()} ~ {df_yield[\"年份\"].max()}')\n",
+    "\n",
+    "print(f'\\n📋 COCO 作物病害检测标注:')\n",
+    "print(f'   图片数: {len(coco[\"images\"])}')\n",
+    "print(f'   标注数: {len(coco[\"annotations\"])}')\n",
+    "print(f'   类别数: {len(coco[\"categories\"])}')\n",
+    "print('=' * 60)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
\ No newline at end of file