rdsarjito commited on
Commit
ec9ce14
Β·
1 Parent(s): aaa3549

first commit

Browse files
Files changed (3) hide show
  1. app.py +101 -0
  2. model/alergen_model.pt +3 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import re
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ # === Konfigurasi Umum ===
9
+ MODEL_PATH = 'model/alergen_model.pt' # Pastikan path dan nama file model benar
10
+ LABELS = ['susu', 'kacang', 'telur', 'makanan_laut', 'gandum']
11
+ MAX_LEN = 128
12
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
+
14
+ # === Load Model & Tokenizer ===
15
+ @st.cache_resource
16
+ def load_model():
17
+ tokenizer = AutoTokenizer.from_pretrained("indobenchmark/indobert-base-p2")
18
+ model = AutoModelForSequenceClassification.from_pretrained(
19
+ "indobenchmark/indobert-base-p2",
20
+ num_labels=len(LABELS),
21
+ problem_type="multi_label_classification"
22
+ )
23
+ state = torch.load(MODEL_PATH, map_location=DEVICE)
24
+ model.load_state_dict(state['model_state_dict'])
25
+ model.to(DEVICE)
26
+ model.eval()
27
+ return tokenizer, model
28
+
29
+ # === Cleaning Teks ===
30
+ def clean_text(text):
31
+ text = text.replace('--', ' ')
32
+ text = re.sub(r"http\S+", "", text)
33
+ text = re.sub('\n', ' ', text)
34
+ text = re.sub("[^a-zA-Z0-9\s]", " ", text)
35
+ text = re.sub(" {2,}", " ", text)
36
+ return text.lower().strip()
37
+
38
+ # === Scrape dari Cookpad ===
39
+ def scrape_ingredients(url):
40
+ try:
41
+ headers = {'User-Agent': 'Mozilla/5.0'}
42
+ r = requests.get(url, headers=headers)
43
+ soup = BeautifulSoup(r.content, 'html.parser')
44
+ ingredients_div = soup.find('div', id='ingredients')
45
+ if ingredients_div:
46
+ return ingredients_div.get_text(separator=' ')
47
+ except:
48
+ return None
49
+
50
+ # === Prediksi Alergen ===
51
+ def predict_alergen(text, tokenizer, model, threshold):
52
+ text = clean_text(text)
53
+ encoding = tokenizer.encode_plus(
54
+ text,
55
+ add_special_tokens=True,
56
+ max_length=MAX_LEN,
57
+ truncation=True,
58
+ padding='max_length',
59
+ return_tensors='pt'
60
+ )
61
+ input_ids = encoding['input_ids'].to(DEVICE)
62
+ attention_mask = encoding['attention_mask'].to(DEVICE)
63
+
64
+ with torch.no_grad():
65
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
66
+ probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]
67
+
68
+ return {label: float(prob) for label, prob in zip(LABELS, probs)}
69
+
70
+ # === UI Streamlit ===
71
+ st.set_page_config(page_title="Deteksi Alergen IndoBERT", page_icon="🍲")
72
+ st.title("🍲 Deteksi Alergen dari Resep Cookpad (IndoBERT)")
73
+
74
+ tokenizer, model = load_model()
75
+
76
+ input_mode = st.radio("Pilih input:", ["Teks Manual", "URL Cookpad"])
77
+ if input_mode == "Teks Manual":
78
+ user_input = st.text_area("πŸ“ Masukkan bahan makanan:")
79
+ else:
80
+ url = st.text_input("πŸ”— Masukkan URL Cookpad:")
81
+ user_input = ""
82
+ if url:
83
+ scraped = scrape_ingredients(url)
84
+ if scraped:
85
+ user_input = scraped
86
+ st.success("βœ… Berhasil mengambil bahan dari URL")
87
+ st.text_area("πŸ“‹ Bahan dari URL:", value=user_input, height=200)
88
+ else:
89
+ st.error("❌ Gagal mengambil data dari URL.")
90
+
91
+ threshold = st.slider("🎚 Threshold (default 0.5):", 0.0, 1.0, 0.5)
92
+
93
+ if st.button("πŸš€ Prediksi"):
94
+ if user_input.strip():
95
+ result = predict_alergen(user_input, tokenizer, model, threshold)
96
+ st.subheader("πŸ“Š Hasil Prediksi Alergen:")
97
+ for label, prob in result.items():
98
+ status = "βœ… Ada" if prob >= threshold else "❌ Tidak Ada"
99
+ st.write(f"- **{label}**: {status} ({prob:.2f})")
100
+ else:
101
+ st.warning("⚠️ Masukkan teks bahan atau URL terlebih dahulu.")
model/alergen_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28df831b272894c11265ef5f4cf1ac2a2ca89e765b26bff928f34c388ff015d5
3
+ size 497868974
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ requests
5
+ beautifulsoup4