Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
import numpy as np | |
import pandas as pd | |
import re | |
import torch | |
import torch.nn as nn | |
from torch.utils.data import Dataset | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import matplotlib.pyplot as plt | |
import warnings | |
warnings.filterwarnings("ignore") | |
# Set page config | |
st.set_page_config( | |
page_title="Deteksi Alergen dalam Resep", | |
page_icon="π²", | |
layout="wide" | |
) | |
# Set device | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# Clean text function | |
def clean_text(text): | |
# Convert dashes to spaces for better tokenization | |
text = text.replace('--', ' ') | |
# Basic cleaning | |
text = re.sub(r"http\S+", "", text) | |
text = re.sub('\n', ' ', text) | |
text = re.sub("[^a-zA-Z0-9\s]", " ", text) | |
text = re.sub(" {2,}", " ", text) | |
text = text.strip() | |
text = text.lower() | |
return text | |
# Define model for multilabel classification | |
class MultilabelBertClassifier(nn.Module): | |
def __init__(self, model_name, num_labels): | |
super(MultilabelBertClassifier, self).__init__() | |
self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels) | |
# Replace the classification head with our own for multilabel | |
self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels) | |
def forward(self, input_ids, attention_mask): | |
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) | |
return outputs.logits | |
# Function to predict allergens in new recipes | |
def load_model(): | |
# Target columns | |
target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum'] | |
# Initialize tokenizer | |
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2') | |
# Initialize model | |
model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns)) | |
# Load model weights if available | |
model_path = "model/alergen_model.pt" | |
try: | |
# Try to load the model | |
checkpoint = torch.load(model_path, map_location=device) | |
model.load_state_dict(checkpoint['model_state_dict']) | |
st.success("Model berhasil dimuat!") | |
except Exception as e: | |
st.error(f"Error loading model: {str(e)}") | |
st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.") | |
model.to(device) | |
model.eval() | |
return model, tokenizer, target_columns | |
def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128): | |
# Clean the text | |
cleaned_text = clean_text(ingredients_text) | |
# Tokenize | |
encoding = tokenizer.encode_plus( | |
cleaned_text, | |
add_special_tokens=True, | |
max_length=max_length, | |
truncation=True, | |
return_tensors='pt', | |
padding='max_length' | |
) | |
input_ids = encoding['input_ids'].to(device) | |
attention_mask = encoding['attention_mask'].to(device) | |
with torch.no_grad(): | |
outputs = model(input_ids=input_ids, attention_mask=attention_mask) | |
predictions = torch.sigmoid(outputs) | |
predictions_prob = predictions.cpu().numpy()[0] | |
predictions_binary = (predictions > 0.5).float().cpu().numpy()[0] | |
result = {} | |
for i, target in enumerate(target_columns): | |
result[target] = { | |
'present': bool(predictions_binary[i]), | |
'probability': float(predictions_prob[i]) | |
} | |
return result | |
# Main application | |
def main(): | |
st.title("Deteksi Alergen dalam Resep") | |
st.markdown(""" | |
Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan. | |
Alergen yang diidentifikasi meliputi: | |
- Susu | |
- Kacang | |
- Telur | |
- Makanan Laut | |
- Gandum | |
""") | |
# Sidebar for model upload | |
st.sidebar.header("Upload Model") | |
uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"]) | |
if uploaded_model is not None: | |
with open("alergen_model.pt", "wb") as f: | |
f.write(uploaded_model.getbuffer()) | |
st.sidebar.success("Model telah diupload dan dimuat!") | |
# Load model | |
model, tokenizer, target_columns = load_model() | |
# Input area | |
st.header("Masukkan Daftar Bahan Resep") | |
ingredients = st.text_area("Bahan-bahan:", height=200, | |
placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...") | |
col1, col2 = st.columns(2) | |
with col1: | |
if st.button("Deteksi Alergen", type="primary"): | |
if ingredients: | |
with st.spinner("Menganalisis bahan-bahan..."): | |
# Clean text for display | |
cleaned_text = clean_text(ingredients) | |
st.markdown("### Bahan yang diproses:") | |
st.text(cleaned_text) | |
# Get predictions | |
results = predict_allergens(ingredients, model, tokenizer, target_columns) | |
# Display results | |
st.markdown("### Hasil Deteksi Alergen:") | |
# Create data for visualization | |
allergens = list(results.keys()) | |
probabilities = [results[a]['probability'] for a in allergens] | |
present = [results[a]['present'] for a in allergens] | |
# Create a colorful table of results | |
result_df = pd.DataFrame({ | |
'Alergen': [a.title() for a in allergens], | |
'Terdeteksi': ['β ' if results[a]['present'] else 'β' for a in allergens], | |
'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens] | |
}) | |
st.dataframe(result_df, use_container_width=True) | |
# Display chart in the second column | |
with col2: | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
bars = ax.bar( | |
[a.title() for a in allergens], | |
probabilities, | |
color=['red' if p else 'green' for p in present] | |
) | |
# Add threshold line | |
ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7) | |
ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom') | |
# Customize the chart | |
ax.set_ylim(0, 1) | |
ax.set_ylabel('Probabilitas') | |
ax.set_title('Probabilitas Deteksi Alergen') | |
# Add values on top of bars | |
for bar in bars: | |
height = bar.get_height() | |
ax.annotate(f'{height:.2f}', | |
xy=(bar.get_x() + bar.get_width() / 2, height), | |
xytext=(0, 3), # 3 points vertical offset | |
textcoords="offset points", | |
ha='center', va='bottom') | |
st.pyplot(fig) | |
# Show detailed explanation | |
st.markdown("### Penjelasan Hasil:") | |
detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']] | |
if detected_allergens: | |
st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**") | |
# Provide specific explanation for each detected allergen | |
for allergen in detected_allergens: | |
if allergen.lower() == 'susu': | |
st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya") | |
elif allergen.lower() == 'kacang': | |
st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya") | |
elif allergen.lower() == 'telur': | |
st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya") | |
elif allergen.lower() == 'makanan_laut': | |
st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya") | |
elif allergen.lower() == 'gandum': | |
st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)") | |
else: | |
st.markdown("Tidak terdeteksi alergen umum dalam resep ini.") | |
st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.") | |
else: | |
st.error("Mohon masukkan daftar bahan terlebih dahulu.") | |
# Examples section | |
with st.expander("Contoh Resep"): | |
st.markdown(""" | |
### Contoh Resep 1 (Mengandung Beberapa Alergen) | |
``` | |
1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis | |
``` | |
### Contoh Resep 2 (Mengandung Susu) | |
``` | |
250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut | |
``` | |
### Contoh Resep 3 (Mengandung Makanan Laut) | |
``` | |
250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya | |
``` | |
""") | |
# About section | |
st.sidebar.markdown("---") | |
st.sidebar.header("Tentang") | |
st.sidebar.info(""" | |
Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan. | |
Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep. | |
""") | |
# Model information | |
st.sidebar.markdown("---") | |
st.sidebar.header("Informasi Model") | |
st.sidebar.markdown(""" | |
- **Model Dasar**: IndoBERT | |
- **Jenis**: Multilabel Classification | |
- **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum | |
""") | |
if __name__ == "__main__": | |
main() |