rdsarjito commited on
Commit
5e92542
·
1 Parent(s): 0c9c0e2
Files changed (2) hide show
  1. app.py +244 -42
  2. requirements.txt +8 -6
app.py CHANGED
@@ -1,62 +1,264 @@
1
- # app.py
2
  import streamlit as st
3
- import torch
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
  import numpy as np
 
6
  import re
 
 
 
 
 
 
 
7
 
8
- # Load model dan tokenizer
9
- @st.cache_resource
10
- def load_model():
11
- checkpoint = torch.load("model/alergen_model.pt", map_location=torch.device("cpu"))
12
- target_columns = checkpoint['target_columns']
13
- model = AutoModelForSequenceClassification.from_pretrained(
14
- "indobenchmark/indobert-base-p2",
15
- num_labels=len(target_columns)
16
- )
17
- model.load_state_dict(checkpoint['model_state_dict'])
18
- model.eval()
19
- return model, target_columns
20
 
21
- model, target_columns = load_model()
22
- tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
23
- max_length = 128
24
 
25
- # Text cleaning
26
  def clean_text(text):
 
27
  text = text.replace('--', ' ')
 
28
  text = re.sub(r"http\S+", "", text)
29
  text = re.sub('\n', ' ', text)
30
  text = re.sub("[^a-zA-Z0-9\s]", " ", text)
31
  text = re.sub(" {2,}", " ", text)
32
- return text.strip().lower()
 
 
33
 
34
- # Inference
35
- def predict_alergens(text):
36
- cleaned = clean_text(text)
37
- inputs = tokenizer.encode_plus(
38
- cleaned,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  add_special_tokens=True,
40
  max_length=max_length,
41
  truncation=True,
42
  return_tensors='pt',
43
  padding='max_length'
44
  )
 
 
 
 
45
  with torch.no_grad():
46
- outputs = model(**inputs)
47
- probs = torch.sigmoid(outputs.logits)
48
- preds = (probs > 0.5).float().numpy()[0]
49
- return dict(zip(target_columns, preds.astype(bool)))
50
-
51
- # Streamlit UI
52
- st.title("Prediksi Alergen Makanan")
53
- ingredients = st.text_area("Masukkan daftar bahan makanan (ingredients):", height=200)
54
-
55
- if st.button("Prediksi"):
56
- if ingredients.strip():
57
- results = predict_alergens(ingredients)
58
- st.subheader("Hasil Prediksi:")
59
- for allergen, is_present in results.items():
60
- st.write(f"✅ {allergen}" if is_present else f"❌ {allergen}")
61
- else:
62
- st.warning("Mohon masukkan teks bahan makanan.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
 
3
  import numpy as np
4
+ import pandas as pd
5
  import re
6
+ import torch
7
+ import torch.nn as nn
8
+ from torch.utils.data import Dataset
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ import matplotlib.pyplot as plt
11
+ import warnings
12
+ warnings.filterwarnings("ignore")
13
 
14
+ # Set page config
15
+ st.set_page_config(
16
+ page_title="Deteksi Alergen dalam Resep",
17
+ page_icon="🍲",
18
+ layout="wide"
19
+ )
 
 
 
 
 
 
20
 
21
+ # Set device
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
23
 
24
+ # Clean text function
25
  def clean_text(text):
26
+ # Convert dashes to spaces for better tokenization
27
  text = text.replace('--', ' ')
28
+ # Basic cleaning
29
  text = re.sub(r"http\S+", "", text)
30
  text = re.sub('\n', ' ', text)
31
  text = re.sub("[^a-zA-Z0-9\s]", " ", text)
32
  text = re.sub(" {2,}", " ", text)
33
+ text = text.strip()
34
+ text = text.lower()
35
+ return text
36
 
37
+ # Define model for multilabel classification
38
+ class MultilabelBertClassifier(nn.Module):
39
+ def __init__(self, model_name, num_labels):
40
+ super(MultilabelBertClassifier, self).__init__()
41
+ self.bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
42
+ # Replace the classification head with our own for multilabel
43
+ self.bert.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
44
+
45
+ def forward(self, input_ids, attention_mask):
46
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
47
+ return outputs.logits
48
+
49
+ # Function to predict allergens in new recipes
50
+ @st.cache_resource
51
+ def load_model():
52
+ # Target columns
53
+ target_columns = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
54
+
55
+ # Initialize tokenizer
56
+ tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p2')
57
+
58
+ # Initialize model
59
+ model = MultilabelBertClassifier('indobenchmark/indobert-base-p1', len(target_columns))
60
+
61
+ # Load model weights if available
62
+ model_path = "alergen_model.pt"
63
+
64
+ try:
65
+ # Try to load the model
66
+ checkpoint = torch.load(model_path, map_location=device)
67
+ model.load_state_dict(checkpoint['model_state_dict'])
68
+ st.success("Model berhasil dimuat!")
69
+ except Exception as e:
70
+ st.error(f"Error loading model: {str(e)}")
71
+ st.warning("Model belum tersedia. Silakan latih model terlebih dahulu atau upload file model.")
72
+
73
+ model.to(device)
74
+ model.eval()
75
+
76
+ return model, tokenizer, target_columns
77
+
78
+ def predict_allergens(ingredients_text, model, tokenizer, target_columns, max_length=128):
79
+ # Clean the text
80
+ cleaned_text = clean_text(ingredients_text)
81
+
82
+ # Tokenize
83
+ encoding = tokenizer.encode_plus(
84
+ cleaned_text,
85
  add_special_tokens=True,
86
  max_length=max_length,
87
  truncation=True,
88
  return_tensors='pt',
89
  padding='max_length'
90
  )
91
+
92
+ input_ids = encoding['input_ids'].to(device)
93
+ attention_mask = encoding['attention_mask'].to(device)
94
+
95
  with torch.no_grad():
96
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
97
+ predictions = torch.sigmoid(outputs)
98
+ predictions_prob = predictions.cpu().numpy()[0]
99
+ predictions_binary = (predictions > 0.5).float().cpu().numpy()[0]
100
+
101
+ result = {}
102
+ for i, target in enumerate(target_columns):
103
+ result[target] = {
104
+ 'present': bool(predictions_binary[i]),
105
+ 'probability': float(predictions_prob[i])
106
+ }
107
+
108
+ return result
109
+
110
+ # Main application
111
+ def main():
112
+ st.title("Deteksi Alergen dalam Resep")
113
+ st.markdown("""
114
+ Aplikasi ini menggunakan model IndoBERT untuk mendeteksi kemungkinan alergen dalam resep berdasarkan daftar bahan.
115
+ Alergen yang diidentifikasi meliputi:
116
+ - Susu
117
+ - Kacang
118
+ - Telur
119
+ - Makanan Laut
120
+ - Gandum
121
+ """)
122
+
123
+ # Sidebar for model upload
124
+ st.sidebar.header("Upload Model")
125
+ uploaded_model = st.sidebar.file_uploader("Upload model allergen (alergen_model.pt)", type=["pt"])
126
+
127
+ if uploaded_model is not None:
128
+ with open("alergen_model.pt", "wb") as f:
129
+ f.write(uploaded_model.getbuffer())
130
+ st.sidebar.success("Model telah diupload dan dimuat!")
131
+
132
+ # Load model
133
+ model, tokenizer, target_columns = load_model()
134
+
135
+ # Input area
136
+ st.header("Masukkan Daftar Bahan Resep")
137
+ ingredients = st.text_area("Bahan-bahan:", height=200,
138
+ placeholder="Contoh: 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat...")
139
+
140
+ col1, col2 = st.columns(2)
141
+
142
+ with col1:
143
+ if st.button("Deteksi Alergen", type="primary"):
144
+ if ingredients:
145
+ with st.spinner("Menganalisis bahan-bahan..."):
146
+ # Clean text for display
147
+ cleaned_text = clean_text(ingredients)
148
+ st.markdown("### Bahan yang diproses:")
149
+ st.text(cleaned_text)
150
+
151
+ # Get predictions
152
+ results = predict_allergens(ingredients, model, tokenizer, target_columns)
153
+
154
+ # Display results
155
+ st.markdown("### Hasil Deteksi Alergen:")
156
+
157
+ # Create data for visualization
158
+ allergens = list(results.keys())
159
+ probabilities = [results[a]['probability'] for a in allergens]
160
+ present = [results[a]['present'] for a in allergens]
161
+
162
+ # Create a colorful table of results
163
+ result_df = pd.DataFrame({
164
+ 'Alergen': [a.title() for a in allergens],
165
+ 'Terdeteksi': ['✅' if results[a]['present'] else '❌' for a in allergens],
166
+ 'Probabilitas': [f"{results[a]['probability']*100:.2f}%" for a in allergens]
167
+ })
168
+
169
+ st.dataframe(result_df, use_container_width=True)
170
+
171
+ # Display chart in the second column
172
+ with col2:
173
+ fig, ax = plt.subplots(figsize=(10, 6))
174
+ bars = ax.bar(
175
+ [a.title() for a in allergens],
176
+ probabilities,
177
+ color=['red' if p else 'green' for p in present]
178
+ )
179
+
180
+ # Add threshold line
181
+ ax.axhline(y=0.5, color='black', linestyle='--', alpha=0.7)
182
+ ax.text(len(allergens)-1, 0.51, 'Threshold (0.5)', ha='right', va='bottom')
183
+
184
+ # Customize the chart
185
+ ax.set_ylim(0, 1)
186
+ ax.set_ylabel('Probabilitas')
187
+ ax.set_title('Probabilitas Deteksi Alergen')
188
+
189
+ # Add values on top of bars
190
+ for bar in bars:
191
+ height = bar.get_height()
192
+ ax.annotate(f'{height:.2f}',
193
+ xy=(bar.get_x() + bar.get_width() / 2, height),
194
+ xytext=(0, 3), # 3 points vertical offset
195
+ textcoords="offset points",
196
+ ha='center', va='bottom')
197
+
198
+ st.pyplot(fig)
199
+
200
+ # Show detailed explanation
201
+ st.markdown("### Penjelasan Hasil:")
202
+ detected_allergens = [allergen.title() for allergen, data in results.items() if data['present']]
203
+
204
+ if detected_allergens:
205
+ st.markdown(f"Resep ini kemungkinan mengandung alergen: **{', '.join(detected_allergens)}**")
206
+
207
+ # Provide specific explanation for each detected allergen
208
+ for allergen in detected_allergens:
209
+ if allergen.lower() == 'susu':
210
+ st.markdown("- **Susu**: Resep mungkin mengandung susu atau produk turunannya")
211
+ elif allergen.lower() == 'kacang':
212
+ st.markdown("- **Kacang**: Resep mungkin mengandung kacang atau produk turunannya")
213
+ elif allergen.lower() == 'telur':
214
+ st.markdown("- **Telur**: Resep mungkin mengandung telur atau produk turunannya")
215
+ elif allergen.lower() == 'makanan_laut':
216
+ st.markdown("- **Makanan Laut**: Resep mungkin mengandung ikan, udang, kerang, atau makanan laut lainnya")
217
+ elif allergen.lower() == 'gandum':
218
+ st.markdown("- **Gandum**: Resep mungkin mengandung gandum atau produk turunannya (termasuk gluten)")
219
+ else:
220
+ st.markdown("Tidak terdeteksi alergen umum dalam resep ini.")
221
+
222
+ st.warning("Catatan: Prediksi ini hanya bersifat indikatif. Selalu verifikasi dengan informasi resmi untuk keamanan konsumsi.")
223
+ else:
224
+ st.error("Mohon masukkan daftar bahan terlebih dahulu.")
225
+
226
+ # Examples section
227
+ with st.expander("Contoh Resep"):
228
+ st.markdown("""
229
+ ### Contoh Resep 1 (Mengandung Beberapa Alergen)
230
+ ```
231
+ 1 bungkus Lontong homemade, 2 butir Telur ayam, 2 kotak kecil Tahu coklat, 4 butir kecil Kentang, 2 buah Tomat merah, 1 buah Ketimun lalap, 4 lembar Selada keriting, 2 lembar Kol putih, 2 porsi Saus kacang homemade, 4 buah Kerupuk udang goreng, Secukupnya emping goreng, 2 sdt Bawang goreng, Secukupnya Kecap manis
232
+ ```
233
+
234
+ ### Contoh Resep 2 (Mengandung Susu)
235
+ ```
236
+ 250 ml susu full cream, 2 sdm tepung maizena, 3 sdm gula pasir, 1/2 sdt vanila ekstrak, secukupnya keju cheddar parut
237
+ ```
238
+
239
+ ### Contoh Resep 3 (Mengandung Makanan Laut)
240
+ ```
241
+ 250 g udang segar, 150 g cumi-cumi, 2 sdm saus tiram, 3 siung bawang putih, 1 ruas jahe, 2 sdm minyak goreng, garam dan merica secukupnya
242
+ ```
243
+ """)
244
+
245
+ # About section
246
+ st.sidebar.markdown("---")
247
+ st.sidebar.header("Tentang")
248
+ st.sidebar.info("""
249
+ Aplikasi ini menggunakan model deep learning berbasis IndoBERT untuk mendeteksi alergen dalam resep makanan.
250
+
251
+ Model ini dilatih untuk mengidentifikasi 5 jenis alergen umum dalam makanan berdasarkan daftar bahan resep.
252
+ """)
253
+
254
+ # Model information
255
+ st.sidebar.markdown("---")
256
+ st.sidebar.header("Informasi Model")
257
+ st.sidebar.markdown("""
258
+ - **Model Dasar**: IndoBERT
259
+ - **Jenis**: Multilabel Classification
260
+ - **Alergen yang Dideteksi**: Susu, Kacang, Telur, Makanan Laut, Gandum
261
+ """)
262
+
263
+ if __name__ == "__main__":
264
+ main()
requirements.txt CHANGED
@@ -1,6 +1,8 @@
1
- streamlit
2
- torch
3
- transformers
4
- requests
5
- beautifulsoup4
6
- numpy
 
 
 
1
+ streamlit>=1.27.0
2
+ torch>=2.0.0
3
+ transformers>=4.35.0
4
+ pandas>=2.0.0
5
+ numpy>=1.24.0
6
+ matplotlib>=3.7.0
7
+ scikit-learn>=1.3.0
8
+ regex>=20