Spaces:

ramadn
/

allergen_detector

Sleeping

App Files Files Community

rdsarjito commited on 15 days ago

Commit

5e92542

1 Parent(s): 0c9c0e2

6 commit

Browse files

Files changed (2) hide show

app.py +244 -42
requirements.txt +8 -6

app.py CHANGED Viewed

@@ -1,62 +1,264 @@
-# app.py
 import streamlit as st
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import numpy as np
 import re
-# Load model dan tokenizer
-@st.cache_resource
-def load_model():
-    checkpoint = torch.load("model/alergen_model.pt", map_location=torch.device("cpu"))
-    target_columns = checkpoint['target_columns']
-    model = AutoModelForSequenceClassification.from_pretrained(
-        "indobenchmark/indobert-base-p2",
-        num_labels=len(target_columns)
-    )
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.eval()
-    return model, target_columns
-model, target_columns = load_model()
-tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
-max_length = 128
-# Text cleaning
 def clean_text(text):
     text = text.replace('--', ' ')
     text = re.sub(r"http\S+", "", text)
     text = re.sub('\n', ' ', text)
     text = re.sub("[^a-zA-Z0-9\s]", " ", text)
     text = re.sub(" {2,}", " ", text)
-    return text.strip().lower()
-# Inference
-def predict_alergens(text):
-    cleaned = clean_text(text)
-    inputs = tokenizer.encode_plus(
-        cleaned,
         add_special_tokens=True,
         max_length=max_length,
         truncation=True,
         return_tensors='pt',
         padding='max_length'
     )
     with torch.no_grad():
-        outputs = model(**inputs)
-        probs = torch.sigmoid(outputs.logits)
-        preds = (probs > 0.5).float().numpy()[0]
-    return dict(zip(target_columns, preds.astype(bool)))
-# Streamlit UI
-st.title("Prediksi Alergen Makanan")
-ingredients = st.text_area("Masukkan daftar bahan makanan (ingredients):", height=200)
-if st.button("Prediksi"):
-    if ingredients.strip():
-        results = predict_alergens(ingredients)
-        st.subheader("Hasil Prediksi:")
-        for allergen, is_present in results.items():
-            st.write(f"✅ {allergen}" if is_present else f"❌ {allergen}")
-    else:
-        st.warning("Mohon masukkan teks bahan makanan.")

 import streamlit as st
+import os
 import numpy as np
+import pandas as pd
 import re
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings("ignore")
+# Set page config
+st.set_page_config(
+    page_title="Deteksi Alergen dalam Resep",
+    page_icon="🍲",
+    layout="wide"
+)
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Clean text function
 def clean_text(text):
+    # Convert dashes to spaces for better tokenization
     text = text.replace('--', ' ')
+    # Basic cleaning
     text = re.sub(r"http\S+", "", text)
     text = re.sub('\n', ' ', text)
     text = re.sub("[^a-zA-Z0-9\s]", " ", text)
     text = re.sub(" {2,}", " ", text)
+    text = text.strip()
+    text = text.lower()
+    return text
+# Define model for multilabel classification
+class MultilabelBertClassifier(nn.Module):
+    def __init__(self, model_name, num_labels):
+        super(MultilabelBertClassifier, self).__init__()
+        self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
+        # Replace the classification head with our own for multilabel
+        self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        return outputs.logits
+# Function to predict allergens in new recipes
+@st.cache_resource
+def load_model():
+    # Target columns
+    target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
+    # Initialize tokenizer
+    tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
+    # Initialize model
+    model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
+    # Load model weights if available
+    model_path = "alergen_model.pt"
+    try:
+        # Try to load the model
+        checkpoint = torch.load(model_path, map_location=device)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        st.success("Model berhasil dimuat!")
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
+    model.to(device)
+    model.eval()
+    return model, tokenizer, target_columns
+def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
+    # Clean the text
+    cleaned_text = clean_text(ingredients_text)
+    # Tokenize
+    encoding = tokenizer.encode_plus(
+        cleaned_text,
         add_special_tokens=True,
         max_length=max_length,
         truncation=True,
         return_tensors='pt',
         padding='max_length'
     )
+    input_ids = encoding['input_ids'].to(device)
+    attention_mask = encoding['attention_mask'].to(device)
     with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        predictions = torch.sigmoid(outputs)
+        predictions_prob = predictions.cpu().numpy()[0]
+        predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
+    result = {}
+    for i, target in enumerate(target_columns):
+        result[target] = {
+            'present': bool(predictions_binary[i]),
+            'probability': float(predictions_prob[i])
+        }
+    return result
+# Main application
+def main():
+    st.title("Deteksi Alergen dalam Resep")
+    st.markdown("""
+    Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
+    Alergen yang diidentifikasi meliputi:
+    - Susu
+    - Kacang
+    - Telur
+    - Makanan Laut
+    - Gandum
+    """)
+    # Sidebar for model upload
+    st.sidebar.header("Upload Model")
+    uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
+    if uploaded_model is not None:
+        with open("alergen_model.pt", "wb") as f:
+            f.write(uploaded_model.getbuffer())
+        st.sidebar.success("Model telah diupload dan dimuat!")
+    # Load model
+    model, tokenizer, target_columns = load_model()
+    # Input area
+    st.header("Masukkan Daftar Bahan Resep")
+    ingredients = st.text_area("Bahan-bahan:", height=200,
+                              placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Deteksi Alergen", type="primary"):
+            if ingredients:
+                with st.spinner("Menganalisis bahan-bahan..."):
+                    # Clean text for display
+                    cleaned_text = clean_text(ingredients)
+                    st.markdown("### Bahan yang diproses:")
+                    st.text(cleaned_text)
+                    # Get predictions
+                    results = predict_allergens(ingredients, model, tokenizer, target_columns)
+                    # Display results
+                    st.markdown("### Hasil Deteksi Alergen:")
+                    # Create data for visualization
+                    allergens = list(results.keys())
+                    probabilities = [results[a]['probability'] for a in allergens]
+                    present = [results[a]['present'] for a in allergens]
+                    # Create a colorful table of results
+                    result_df = pd.DataFrame({
+                        'Alergen': [a.title() for a in allergens],
+                        'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
+                        'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
+                    })
+                    st.dataframe(result_df, use_container_width=True)
+                    # Display chart in the second column
+                    with col2:
+                        fig, ax = plt.subplots(figsize=(10, 6))
+                        bars = ax.bar(
+                            [a.title() for a in allergens],
+                            probabilities,
+                            color=['red' if p else 'green' for p in present]
+                        )
+                        # Add threshold line
+                        ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
+                        ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
+                        # Customize the chart
+                        ax.set_ylim(0, 1)
+                        ax.set_ylabel('Probabilitas')
+                        ax.set_title('Probabilitas Deteksi Alergen')
+                        # Add values on top of bars
+                        for bar in bars:
+                            height = bar.get_height()
+                            ax.annotate(f'{height:.2f}',
+                                       xy=(bar.get_x() + bar.get_width() / 2, height),
+                                       xytext=(0, 3),  # 3 points vertical offset
+                                       textcoords="offset points",
+                                       ha='center', va='bottom')
+                        st.pyplot(fig)
+                    # Show detailed explanation
+                    st.markdown("### Penjelasan Hasil:")
+                    detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
+                    if detected_allergens:
+                        st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
+                        # Provide specific explanation for each detected allergen
+                        for allergen in detected_allergens:
+                            if allergen.lower() == 'susu':
+                                st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
+                            elif allergen.lower() == 'kacang':
+                                st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
+                            elif allergen.lower() == 'telur':
+                                st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
+                            elif allergen.lower() == 'makanan_laut':
+                                st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
+                            elif allergen.lower() == 'gandum':
+                                st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
+                    else:
+                        st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
+                    st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
+            else:
+                st.error("Mohon masukkan daftar bahan terlebih dahulu.")
+    # Examples section
+    with st.expander("Contoh Resep"):
+        st.markdown("""
+        ### Contoh Resep 1 (Mengandung Beberapa Alergen)
+        ```
+        1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
+        ```
+        ### Contoh Resep 2 (Mengandung Susu)
+        ```
+        250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
+        ```
+        ### Contoh Resep 3 (Mengandung Makanan Laut)
+        ```
+        250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
+        ```
+        """)
+    # About section
+    st.sidebar.markdown("---")
+    st.sidebar.header("Tentang")
+    st.sidebar.info("""
+    Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
+    Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
+    """)
+    # Model information
+    st.sidebar.markdown("---")
+    st.sidebar.header("Informasi Model")
+    st.sidebar.markdown("""
+    - **Model Dasar**: IndoBERT
+    - **Jenis**: Multilabel Classification
+    - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
+    """)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,6 +1,8 @@
-streamlit
-torch
-transformers
-requests
-beautifulsoup4
-numpy

+streamlit>=1.27.0
+torch>=2.0.0
+transformers>=4.35.0
+pandas>=2.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+scikit-learn>=1.3.0
+regex>=20