Spaces:
Sleeping
Sleeping
rdsarjito
commited on
Commit
Β·
ec9ce14
1
Parent(s):
aaa3549
first commit
Browse files- app.py +101 -0
- model/alergen_model.pt +3 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
import re
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
+
|
8 |
+
# === Konfigurasi Umum ===
|
9 |
+
MODEL_PATH = 'model/alergen_model.pt' # Pastikan path dan nama file model benar
|
10 |
+
LABELS = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
|
11 |
+
MAX_LEN = 128
|
12 |
+
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
13 |
+
|
14 |
+
# === Load Model & Tokenizer ===
|
15 |
+
@st.cache_resource
|
16 |
+
def load_model():
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
|
18 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
19 |
+
"indobenchmark/indobert-base-p2",
|
20 |
+
num_labels=len(LABELS),
|
21 |
+
problem_type="multi_label_classification"
|
22 |
+
)
|
23 |
+
state = torch.load(MODEL_PATH, map_location=DEVICE)
|
24 |
+
model.load_state_dict(state['model_state_dict'])
|
25 |
+
model.to(DEVICE)
|
26 |
+
model.eval()
|
27 |
+
return tokenizer, model
|
28 |
+
|
29 |
+
# === Cleaning Teks ===
|
30 |
+
def clean_text(text):
|
31 |
+
text = text.replace('--', ' ')
|
32 |
+
text = re.sub(r"http\S+", "", text)
|
33 |
+
text = re.sub('\n', ' ', text)
|
34 |
+
text = re.sub("[^a-zA-Z0-9\s]", " ", text)
|
35 |
+
text = re.sub(" {2,}", " ", text)
|
36 |
+
return text.lower().strip()
|
37 |
+
|
38 |
+
# === Scrape dari Cookpad ===
|
39 |
+
def scrape_ingredients(url):
|
40 |
+
try:
|
41 |
+
headers = {'User-Agent': 'Mozilla/5.0'}
|
42 |
+
r = requests.get(url, headers=headers)
|
43 |
+
soup = BeautifulSoup(r.content, 'html.parser')
|
44 |
+
ingredients_div = soup.find('div', id='ingredients')
|
45 |
+
if ingredients_div:
|
46 |
+
return ingredients_div.get_text(separator=' ')
|
47 |
+
except:
|
48 |
+
return None
|
49 |
+
|
50 |
+
# === Prediksi Alergen ===
|
51 |
+
def predict_alergen(text, tokenizer, model, threshold):
|
52 |
+
text = clean_text(text)
|
53 |
+
encoding = tokenizer.encode_plus(
|
54 |
+
text,
|
55 |
+
add_special_tokens=True,
|
56 |
+
max_length=MAX_LEN,
|
57 |
+
truncation=True,
|
58 |
+
padding='max_length',
|
59 |
+
return_tensors='pt'
|
60 |
+
)
|
61 |
+
input_ids = encoding['input_ids'].to(DEVICE)
|
62 |
+
attention_mask = encoding['attention_mask'].to(DEVICE)
|
63 |
+
|
64 |
+
with torch.no_grad():
|
65 |
+
outputs = model(input_ids=input_ids, attention_mask=attention_mask)
|
66 |
+
probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]
|
67 |
+
|
68 |
+
return {label: float(prob) for label, prob in zip(LABELS, probs)}
|
69 |
+
|
70 |
+
# === UI Streamlit ===
|
71 |
+
st.set_page_config(page_title="Deteksi Alergen IndoBERT", page_icon="π²")
|
72 |
+
st.title("π² Deteksi Alergen dari Resep Cookpad (IndoBERT)")
|
73 |
+
|
74 |
+
tokenizer, model = load_model()
|
75 |
+
|
76 |
+
input_mode = st.radio("Pilih input:", ["Teks Manual", "URL Cookpad"])
|
77 |
+
if input_mode == "Teks Manual":
|
78 |
+
user_input = st.text_area("π Masukkan bahan makanan:")
|
79 |
+
else:
|
80 |
+
url = st.text_input("π Masukkan URL Cookpad:")
|
81 |
+
user_input = ""
|
82 |
+
if url:
|
83 |
+
scraped = scrape_ingredients(url)
|
84 |
+
if scraped:
|
85 |
+
user_input = scraped
|
86 |
+
st.success("β
Berhasil mengambil bahan dari URL")
|
87 |
+
st.text_area("π Bahan dari URL:", value=user_input, height=200)
|
88 |
+
else:
|
89 |
+
st.error("β Gagal mengambil data dari URL.")
|
90 |
+
|
91 |
+
threshold = st.slider("π Threshold (default 0.5):", 0.0, 1.0, 0.5)
|
92 |
+
|
93 |
+
if st.button("π Prediksi"):
|
94 |
+
if user_input.strip():
|
95 |
+
result = predict_alergen(user_input, tokenizer, model, threshold)
|
96 |
+
st.subheader("π Hasil Prediksi Alergen:")
|
97 |
+
for label, prob in result.items():
|
98 |
+
status = "β
Ada" if prob >= threshold else "β Tidak Ada"
|
99 |
+
st.write(f"- **{label}**: {status} ({prob:.2f})")
|
100 |
+
else:
|
101 |
+
st.warning("β οΈ Masukkan teks bahan atau URL terlebih dahulu.")
|
model/alergen_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28df831b272894c11265ef5f4cf1ac2a2ca89e765b26bff928f34c388ff015d5
|
3 |
+
size 497868974
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
requests
|
5 |
+
beautifulsoup4
|