Files
pest-image-search/app.py
zhenghu ec7c9f8dbe feat: 初始化病虫害以图搜图应用
- 基于 CLIP 模型实现图片相似度搜索(app.py / main.py)
  - 新增 Streamlit 可视化交互界面
  - 新增 pyproject.toml、justfile、Dockerfile 项目配置
  - 补充完整 README 文档(功能介绍、快速开始、Docker 部署)
  - 新增 .gitignore
2026-04-14 16:24:04 +08:00

600 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
病虫害以图搜图
基于 CLIP 本地模型的图片 Embedding 相似度搜索
"""
from __future__ import annotations
import io
import os
from dataclasses import dataclass
from typing import Literal
import numpy as np
import plotly.graph_objects as go
import requests
import streamlit as st
from PIL import Image
from transformers import CLIPModel, CLIPProcessor
# ─── Page Config ────────────────────────────────────────────────────────────
st.set_page_config(
page_title="病虫害以图搜图",
page_icon="🌿",
layout="wide",
initial_sidebar_state="expanded",
)
# ─── Custom CSS ──────────────────────────────────────────────────────────────
st.markdown("""
<style>
:root {
--soil: #7c5e42;
--leaf: #4a7c59;
--leaf-light: #6b9e75;
--wheat: #d4a574;
--cream: #faf8f3;
--paper: #ffffff;
--ink: #2c2c2c;
--ink-muted: #5a5a5a;
--border: #e5e0d5;
--shadow: rgba(0,0,0,0.04);
--danger: #c45c4a;
--danger-light: #f5eae8;
}
html, body, [class*="css"] {
font-family: "PingFang SC", "Microsoft YaHei", "Noto Sans SC", sans-serif;
color: var(--ink);
}
.stApp {
background: var(--cream);
}
/* Sidebar */
[data-testid="stSidebar"] {
background: #f5f2eb;
border-right: 1px solid var(--border);
}
[data-testid="stSidebar"] .stSlider label,
[data-testid="stSidebar"] .stNumberInput label,
[data-testid="stSidebar"] .stSelectbox label,
[data-testid="stSidebar"] .stTextInput label,
[data-testid="stSidebar"] .stFileUploader label {
color: var(--soil) !important;
font-size: 0.85rem;
font-weight: 500;
}
/* Metric / hero cards */
.metric-card {
background: var(--paper);
border: 1px solid var(--border);
border-radius: 14px;
padding: 20px 18px;
text-align: center;
box-shadow: 0 2px 10px var(--shadow);
}
.metric-value {
font-size: 1.9rem;
font-weight: 700;
color: var(--leaf);
line-height: 1.1;
}
.metric-unit {
font-size: 0.8rem;
color: var(--ink-muted);
margin-top: 4px;
}
.metric-label {
font-size: 0.9rem;
color: var(--ink);
margin-top: 8px;
font-weight: 500;
}
/* Section headers */
.section-header {
font-size: 0.95rem;
font-weight: 600;
color: var(--soil);
padding-bottom: 8px;
margin-bottom: 14px;
margin-top: 22px;
border-bottom: 1px solid var(--border);
}
/* Result cards */
.result-card {
background: var(--paper);
border: 1px solid var(--border);
border-radius: 14px;
padding: 16px;
margin-bottom: 14px;
box-shadow: 0 1px 6px var(--shadow);
}
.result-rank {
width: 28px;
height: 28px;
border-radius: 50%;
background: var(--wheat);
color: #fff;
font-size: 0.85rem;
font-weight: 700;
display: inline-flex;
align-items: center;
justify-content: center;
margin-right: 10px;
}
.result-name {
font-size: 1.05rem;
font-weight: 600;
color: var(--ink);
}
.result-score {
font-size: 0.9rem;
color: var(--leaf);
font-weight: 600;
}
/* Tags */
.tag {
display: inline-block;
background: #f3f6f3;
border: 1px solid var(--leaf-light);
border-radius: 999px;
padding: 3px 10px;
font-size: 0.78rem;
color: var(--leaf);
margin: 3px 3px 3px 0;
}
.tag-warn {
background: var(--danger-light);
border-color: var(--danger);
color: var(--danger);
}
/* Hero */
.hero-title {
font-size: 1.6rem;
font-weight: 700;
color: var(--soil);
line-height: 1.2;
}
.hero-sub {
font-size: 0.85rem;
color: var(--ink-muted);
margin-top: 4px;
}
/* Sidebar title */
.sidebar-title {
font-size: 1.15rem;
font-weight: 700;
color: var(--soil);
margin-bottom: 2px;
}
.sidebar-sub {
font-size: 0.75rem;
color: var(--ink-muted);
margin-bottom: 12px;
}
/* Info panel */
.info-panel {
background: var(--paper);
border: 1px solid var(--border);
border-radius: 12px;
padding: 14px 16px;
font-size: 0.88rem;
color: var(--ink-muted);
line-height: 1.7;
}
/* Streamlit overrides */
.stButton > button {
border-radius: 10px !important;
background: var(--leaf) !important;
border: none !important;
color: #fff !important;
}
.stButton > button:hover {
background: var(--leaf-light) !important;
}
/* Radio horizontal */
.stRadio [role="radiogroup"] {
gap: 8px;
}
</style>
""", unsafe_allow_html=True)
# ─── Knowledge Base ──────────────────────────────────────────────────────────
@dataclass(frozen=True)
class PestItem:
name: str
url: str
symptoms: str
treatment: str
crop: str
category: Literal["病害", "虫害"]
PEST_KNOWLEDGE: list[PestItem] = [
PestItem(
name="水稻稻瘟病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151854_dc9667cf_%E6%B0%B4%E7%A8%BB%E7%A8%BB%E7%98%9F%E7%97%851.jpeg",
symptoms="叶片出现梭形或纺锤形病斑,中央灰白色,边缘褐色,严重时病斑连片导致叶片枯死",
treatment="选用抗病品种,合理施肥避免偏施氮肥,发病初期喷施三环唑或稻瘟灵",
crop="水稻",
category="病害",
),
PestItem(
name="水稻纹枯病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_152022_9f3124ab_%E6%B0%B4%E7%A8%BB%E7%BA%B9%E6%9E%AF%E7%97%851.jpeg",
symptoms="叶鞘和叶片上出现云纹状灰绿色至灰褐色病斑,后期病斑边缘褐色、中央灰白色",
treatment="合理密植,科学管水,发病初期喷施井冈霉素或噻呋酰胺",
crop="水稻",
category="病害",
),
PestItem(
name="水稻胡麻叶斑病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151936_41fdb1dc_%E6%B0%B4%E7%A8%BB%E8%83%A1%E9%BA%BB%E5%8F%B6%E6%96%91%E7%97%851.jpeg",
symptoms="叶片上出现暗褐色芝麻粒大小的椭圆形病斑,病斑周围有黄色晕圈",
treatment="增施硅肥和钾肥提高抗病力,喷施丙环唑或咪鲜胺防治",
crop="水稻",
category="病害",
),
PestItem(
name="小麦锈病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_153814_3e175ca3_%E5%B0%8F%E9%BA%A6%E9%94%88%E7%97%851.jpeg",
symptoms="叶片和叶鞘上出现铁锈色粉状疱疹(夏孢子堆),后期变为黑色冬孢子堆",
treatment="种植抗锈品种,发病初期喷施三唑酮或烯唑醇,注意轮作",
crop="小麦",
category="病害",
),
PestItem(
name="小麦赤霉病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_152112_2e1f530e_%E5%B0%8F%E9%BA%A6%E8%B5%A4%E9%9C%89%E7%97%851.jpeg",
symptoms="穗部小穗发病,颖壳上出现水浸状褐色斑,后期产生粉红色霉层",
treatment="选用抗病品种,齐穗至扬花初期喷施多菌灵或戊唑醇",
crop="小麦",
category="病害",
),
PestItem(
name="玉米大斑病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_153911_ee5a72be_%E7%8E%89%E7%B1%B3%E5%A4%A7%E6%96%91%E7%97%851.jpeg",
symptoms="叶片上出现灰绿色水浸状斑点,扩展为长梭形灰褐色大型病斑",
treatment="种植抗病品种,适时早播,发病初期喷施多菌灵或代森锰锌",
crop="玉米",
category="病害",
),
PestItem(
name="玉米小斑病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_154001_e31a0103_%E7%8E%89%E7%B1%B3%E5%B0%8F%E6%96%91%E7%97%851.jpeg",
symptoms="叶片上出现椭圆形黄褐色小病斑有2-3圈同心轮纹边缘紫褐色",
treatment="轮作倒茬,清除病残体,喷施百菌清或甲基托布津",
crop="玉米",
category="病害",
),
PestItem(
name="玉米螟",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_153938_8be05006_%E7%8E%89%E7%B1%B3%E8%9E%9F1.jpeg",
symptoms="幼虫蛀食茎秆和穗轴,茎秆上有蛀孔,孔口有虫粪,造成茎秆折断",
treatment="心叶期撒施白僵菌颗粒剂,释放赤眼蜂生物防治,大喇叭口期灌心",
crop="玉米",
category="虫害",
),
PestItem(
name="稻飞虱",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151643_db5e1d36_%E7%A8%BB%E9%A3%9E%E8%99%AB1.jpeg",
symptoms="稻株基部聚集大量褐色或白色小型飞虫,受害稻株发黄矮缩,严重时枯死倒伏",
treatment="合理施肥避免贪青晚熟,选用吡蚜酮或烯啶虫胺防治,保护利用天敌",
crop="水稻",
category="虫害",
),
PestItem(
name="大豆蚜虫",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151549_d9cf327b_%E5%A4%A7%E8%B1%86%E8%9A%9C%E8%99%AB1.jpeg",
symptoms="嫩叶和茎尖聚集大量绿色或黄色蚜虫,叶片卷缩变形,植株矮化",
treatment="保护瓢虫等天敌百株蚜量达1000头时喷施吡虫啉或啶虫脒",
crop="大豆",
category="虫害",
),
PestItem(
name="番茄晚疫病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151705_3dd8baab_%E7%95%AA%E8%8C%84%E6%99%9A%E7%96%AB%E7%97%851.jpeg",
symptoms="叶片出现水浸状暗绿色不规则病斑,潮湿时叶背面产生白色霉层,果实变褐硬化",
treatment="控制温湿度,及时通风降湿,发病初期喷施甲霜灵锰锌或霜脲氰",
crop="番茄",
category="病害",
),
PestItem(
name="黄瓜霜霉病",
url="https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151804_7be515fa_%E9%BB%84%E7%93%9C%E9%9C%9C%E9%9C%89%E7%97%851.jpeg",
symptoms="叶片正面出现黄色多角形病斑,叶背面潮湿时产生灰黑色霉层",
treatment="选用抗病品种,膜下滴灌降低湿度,喷施百菌清或霜霉威盐酸盐",
crop="黄瓜",
category="病害",
),
]
EXAMPLE_IMAGES: list[tuple[str, str]] = [
(
"水稻稻瘟病",
"https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151914_4f5b8fef_%E6%B0%B4%E7%A8%BB%E7%A8%BB%E7%98%9F%E7%97%852.jpeg",
),
(
"番茄晚疫病",
"https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_151726_a8f31320_%E7%95%AA%E8%8C%84%E6%99%9A%E7%96%AB%E7%97%852.jpeg",
),
(
"小麦锈病",
"https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_153837_e8ae9f43_%E5%B0%8F%E9%BA%A6%E9%94%88%E7%97%852.jpeg",
),
(
"水稻纹枯病",
"https://minio.dev.maimaiag.com/crop-prod-bucket/field_photo/20260410_152050_77d568b1_%E6%B0%B4%E7%A8%BB%E7%BA%B9%E6%9E%AF%E7%97%852.jpeg",
),
]
# ─── CLIP Embedder ───────────────────────────────────────────────────────────
class CLIPEmbedder:
MODEL_NAME = "openai/clip-vit-base-patch32"
def __init__(self) -> None:
self._processor: CLIPProcessor | None = None
self._model: CLIPModel | None = None
def _load(self) -> tuple[CLIPProcessor, CLIPModel]:
if self._processor is None or self._model is None:
with st.spinner("首次启动正在加载 CLIP 模型,请稍候..."):
self._processor = CLIPProcessor.from_pretrained(self.MODEL_NAME)
self._model = CLIPModel.from_pretrained(self.MODEL_NAME)
return self._processor, self._model
def embed(self, image: Image.Image) -> np.ndarray:
processor, model = self._load()
inputs = processor(images=image, return_tensors="pt")
image_features = model.get_image_features(**inputs)
vec = image_features.detach().cpu().numpy().flatten()
norm = np.linalg.norm(vec)
if norm == 0:
return vec
return vec / norm
@st.cache_resource(show_spinner=False)
def get_embedder() -> CLIPEmbedder:
return CLIPEmbedder()
# ─── Utilities ───────────────────────────────────────────────────────────────
def load_image(source: str | io.BytesIO) -> Image.Image | None:
try:
if isinstance(source, str):
resp = requests.get(source, timeout=30)
resp.raise_for_status()
return Image.open(io.BytesIO(resp.content)).convert("RGB")
return Image.open(source).convert("RGB")
except Exception as e:
st.error(f"图片加载失败: {e}")
return None
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
return float(np.dot(a, b))
@st.cache_data(show_spinner=False)
def build_index() -> tuple[list[dict], list[str], list[str]]:
embedder = get_embedder()
items, succeeded, failed = [], [], []
progress = st.progress(0, text="正在构建病虫害图片索引...")
total = len(PEST_KNOWLEDGE)
for i, pest in enumerate(PEST_KNOWLEDGE):
img = load_image(pest.url)
if img is None:
failed.append(pest.name)
progress.progress((i + 1) / total, text=f"索引构建中 ({i + 1}/{total})...")
continue
try:
embedding = embedder.embed(img)
items.append({
"name": pest.name,
"url": pest.url,
"embedding": embedding,
"symptoms": pest.symptoms,
"treatment": pest.treatment,
"crop": pest.crop,
"category": pest.category,
})
succeeded.append(pest.name)
except Exception:
failed.append(pest.name)
progress.progress((i + 1) / total, text=f"索引构建中 ({i + 1}/{total})...")
progress.empty()
return items, succeeded, failed
# ─── Sidebar ─────────────────────────────────────────────────────────────────
with st.sidebar:
st.markdown('<div class="sidebar-title">🌿 病虫害以图搜图</div>', unsafe_allow_html=True)
st.markdown('<div class="sidebar-sub">上传图片,智能识别相似病虫害</div>', unsafe_allow_html=True)
st.markdown("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>", unsafe_allow_html=True)
st.markdown('<div class="section-header" style="margin-top:0">🖼️ 输入方式</div>', unsafe_allow_html=True)
input_mode = st.radio("", ["上传本地图片", "输入图片 URL", "选择示例图片"], label_visibility="collapsed")
query_source = None
query_url = ""
if input_mode == "上传本地图片":
uploaded = st.file_uploader("选择图片", type=["jpg", "jpeg", "png", "webp"])
if uploaded is not None:
query_source = io.BytesIO(uploaded.getvalue())
query_url = ""
elif input_mode == "输入图片 URL":
query_url = st.text_input("图片 URL", placeholder="https://example.com/image.jpg")
if query_url.strip():
query_source = query_url.strip()
else:
st.markdown('<div style="font-size:0.8rem;color:#7a7a7a;margin-bottom:6px;">点击选择示例</div>', unsafe_allow_html=True)
cols = st.columns(2)
for idx, (name, url) in enumerate(EXAMPLE_IMAGES):
with cols[idx % 2]:
if st.button(name, key=f"ex_{name}"):
st.session_state.query_url = url
if "query_url" in st.session_state:
query_url = st.session_state.query_url
query_source = query_url
st.image(query_url, use_container_width=True)
st.markdown('<div class="section-header">⚙️ 搜索设置</div>', unsafe_allow_html=True)
top_k = st.slider("返回条数", 1, min(12, len(PEST_KNOWLEDGE)), 5)
st.markdown("<br>", unsafe_allow_html=True)
search_clicked = st.button("开始搜索", type="primary", use_container_width=True)
st.markdown("<hr style='border:none;border-top:1px solid var(--border);margin:12px 0;'>", unsafe_allow_html=True)
st.markdown("""
<div class="info-panel">
<b>使用说明</b><br>
1. 上传病虫害患处图片<br>
2. 系统自动提取图像特征<br>
3. 与知识库比对返回相似结果<br>
4. 参考症状与防治建议
</div>
""", unsafe_allow_html=True)
# ─── Build Index ─────────────────────────────────────────────────────────────
index_items, succeeded, failed = build_index()
# ─── Main Layout ─────────────────────────────────────────────────────────────
st.markdown("""
<div style="display:flex; align-items:baseline; gap:12px; margin-bottom:4px;">
<div class="hero-title">病虫害以图搜图</div>
</div>
<div class="hero-sub">基于 CLIP 视觉模型的病虫害相似度检索与防治建议</div>
""", unsafe_allow_html=True)
# Status badges
badges = []
if succeeded:
badges.append(f'<span class="tag">📚 知识库 {len(succeeded)} 种</span>')
if failed:
badges.append(f'<span class="tag tag-warn">⚠️ 索引失败 {len(failed)} 种</span>')
if badges:
st.markdown(f"<div style='margin-top:8px;'>{''.join(badges)}</div>", unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
# ─── Search Logic ────────────────────────────────────────────────────────────
if search_clicked and query_source is not None and index_items:
query_img = load_image(query_source)
if query_img is not None:
col_query, col_preview = st.columns([1, 3])
with col_query:
st.markdown('<div class="section-header" style="margin-top:0">🔍 查询图片</div>', unsafe_allow_html=True)
st.image(query_img, use_container_width=True)
with col_preview:
st.markdown('<div class="section-header" style="margin-top:0">⏳ 正在分析...</div>', unsafe_allow_html=True)
progress = st.progress(0, text="提取图像特征...")
embedder = get_embedder()
query_embedding = embedder.embed(query_img)
progress.progress(50, text="比对知识库...")
scores = []
for item in index_items:
sim = cosine_similarity(query_embedding, item["embedding"])
scores.append((sim, item))
scores.sort(key=lambda x: x[0], reverse=True)
results = scores[:top_k]
progress.progress(100, text="搜索完成")
progress.empty()
st.markdown(f'<div class="section-header" style="margin-top:0">🏆 搜索结果Top-{len(results)}</div>', unsafe_allow_html=True)
# Similarity bar chart
names = [f"{r[1]['name']}" for r in results]
sims = [r[0] * 100 for r in results]
colors = ["#c45c4a" if r[1]["category"] == "虫害" else "#4a7c59" for r in results]
fig_bar = go.Figure()
fig_bar.add_trace(go.Bar(
x=sims,
y=names,
orientation="h",
marker=dict(color=colors, opacity=0.85, line=dict(color="rgba(0,0,0,0.08)", width=1)),
text=[f"{s:.1f}%" for s in sims],
textposition="outside",
textfont=dict(color="#5a5a5a", size=10),
))
fig_bar.update_layout(
xaxis=dict(title="相似度 (%)", color="#5a5a5a", gridcolor="rgba(0,0,0,0.06)", range=[0, 105]),
yaxis=dict(color="#5a5a5a", gridcolor="rgba(0,0,0,0.04)", autorange="reversed"),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
font=dict(color="#2c2c2c", size=11),
margin=dict(t=10, b=30, l=80, r=50),
height=160 + len(results) * 34,
showlegend=False,
)
st.plotly_chart(fig_bar, use_container_width=True)
# Result cards below
st.markdown('<div class="section-header">📋 详细结果</div>', unsafe_allow_html=True)
for rank, (sim, item) in enumerate(results, 1):
with st.container():
st.markdown(f"""
<div class="result-card">
<div style="display:flex; gap:14px; align-items:flex-start;">
<div style="flex:0 0 140px;">
<img src="{item['url']}" style="width:100%; border-radius:10px; border:1px solid var(--border);">
</div>
<div style="flex:1;">
<div style="display:flex; align-items:center; margin-bottom:8px;">
<span class="result-rank">{rank}</span>
<span class="result-name">{item['name']}</span>
<span style="margin-left:auto;" class="result-score">相似度 {sim*100:.1f}%</span>
</div>
<div style="margin-bottom:8px;">
<span class="tag">{item['crop']}</span>
<span class="tag{' tag-warn' if item['category'] == '虫害' else ''}">{item['category']}</span>
</div>
<div style="font-size:0.88rem; color:var(--ink); line-height:1.6;">
<b>症状:</b>{item['symptoms']}<br>
<b>防治:</b>{item['treatment']}
</div>
</div>
</div>
</div>
""", unsafe_allow_html=True)
# Advisory summary
if results:
best = results[0][1]
st.markdown('<div class="section-header">💡 初步建议</div>', unsafe_allow_html=True)
st.markdown(f"""
<div class="info-panel" style="border-left:3px solid var(--leaf-light); border-radius:0 12px 12px 0;">
系统判断该图片与 <b>{best['name']}</b>{best['crop']}{best['category']})最为相似,相似度 <b>{results[0][0]*100:.1f}%</b>。<br>
建议结合田间实际情况进一步确认,参考防治方案:<b>{best['treatment']}</b>
</div>
""", unsafe_allow_html=True)
elif search_clicked and not index_items:
st.warning("知识库索引为空,请检查网络连接后刷新页面重试。")
# ─── Footer ───────────────────────────────────────────────────────────────────
st.markdown("<br>", unsafe_allow_html=True)
st.markdown("""
<div style="text-align:center; font-size:0.78rem; color:#aaa; padding:14px; border-top:1px solid #e5e0d5;">
病虫害以图搜图 · 基于 CLIP 视觉模型 · 结果仅供参考,请结合田间实际情况判断
</div>
""", unsafe_allow_html=True)