Spaces:

ramadn
/

allergen_detector

Sleeping

App Files Files Community

rdsarjito commited on 15 days ago

Commit

ec9ce14

1 Parent(s): aaa3549

first commit

Browse files

Files changed (3) hide show

app.py +101 -0
model/alergen_model.pt +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import streamlit as st
+import torch
+import re
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import requests
+from bs4 import BeautifulSoup
+# === Konfigurasi Umum ===
+MODEL_PATH = 'model/alergen_model.pt'  # Pastikan path dan nama file model benar
+LABELS = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
+MAX_LEN = 128
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# === Load Model & Tokenizer ===
+@st.cache_resource
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
+    model = AutoModelForSequenceClassification.from_pretrained(
+        "indobenchmark/indobert-base-p2",
+        num_labels=len(LABELS),
+        problem_type="multi_label_classification"
+    )
+    state = torch.load(MODEL_PATH, map_location=DEVICE)
+    model.load_state_dict(state['model_state_dict'])
+    model.to(DEVICE)
+    model.eval()
+    return tokenizer, model
+# === Cleaning Teks ===
+def clean_text(text):
+    text = text.replace('--', ' ')
+    text = re.sub(r"http\S+", "", text)
+    text = re.sub('\n', ' ', text)
+    text = re.sub("[^a-zA-Z0-9\s]", " ", text)
+    text = re.sub(" {2,}", " ", text)
+    return text.lower().strip()
+# === Scrape dari Cookpad ===
+def scrape_ingredients(url):
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        r = requests.get(url, headers=headers)
+        soup = BeautifulSoup(r.content, 'html.parser')
+        ingredients_div = soup.find('div', id='ingredients')
+        if ingredients_div:
+            return ingredients_div.get_text(separator=' ')
+    except:
+        return None
+# === Prediksi Alergen ===
+def predict_alergen(text, tokenizer, model, threshold):
+    text = clean_text(text)
+    encoding = tokenizer.encode_plus(
+        text,
+        add_special_tokens=True,
+        max_length=MAX_LEN,
+        truncation=True,
+        padding='max_length',
+        return_tensors='pt'
+    )
+    input_ids = encoding['input_ids'].to(DEVICE)
+    attention_mask = encoding['attention_mask'].to(DEVICE)
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]
+    return {label: float(prob) for label, prob in zip(LABELS, probs)}
+# === UI Streamlit ===
+st.set_page_config(page_title="Deteksi Alergen IndoBERT", page_icon="🍲")
+st.title("🍲 Deteksi Alergen dari Resep Cookpad (IndoBERT)")
+tokenizer, model = load_model()
+input_mode = st.radio("Pilih input:", ["Teks Manual", "URL Cookpad"])
+if input_mode == "Teks Manual":
+    user_input = st.text_area("📝 Masukkan bahan makanan:")
+else:
+    url = st.text_input("🔗 Masukkan URL Cookpad:")
+    user_input = ""
+    if url:
+        scraped = scrape_ingredients(url)
+        if scraped:
+            user_input = scraped
+            st.success("✅ Berhasil mengambil bahan dari URL")
+            st.text_area("📋 Bahan dari URL:", value=user_input, height=200)
+        else:
+            st.error("❌ Gagal mengambil data dari URL.")
+threshold = st.slider("🎚 Threshold (default 0.5):", 0.0, 1.0, 0.5)
+if st.button("🚀 Prediksi"):
+    if user_input.strip():
+        result = predict_alergen(user_input, tokenizer, model, threshold)
+        st.subheader("📊 Hasil Prediksi Alergen:")
+        for label, prob in result.items():
+            status = "✅ Ada" if prob >= threshold else "❌ Tidak Ada"
+            st.write(f"- **{label}**: {status} ({prob:.2f})")
+    else:
+        st.warning("⚠️ Masukkan teks bahan atau URL terlebih dahulu.")

model/alergen_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28df831b272894c11265ef5f4cf1ac2a2ca89e765b26bff928f34c388ff015d5
+size 497868974

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+torch
+transformers
+requests
+beautifulsoup4