orangeorang's picture
Update app.py
27d81dc verified
raw
history blame contribute delete
4.03 kB
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline
import re
# Load chatbot menggunakan InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
# Load model Named Entity Recognition (NER)
ner_pipeline = pipeline("ner", model="d4data/biomedical-ner-all")
# Fungsi untuk ekstraksi entitas medis dari teks
def extract_entities(text):
entities = ner_pipeline(text)
merged_entities = []
current_word = ""
current_entity = None
for ent in entities:
word = ent["word"]
entity_type = ent["entity"].split("-")[-1] # Ambil tipe entitas tanpa B- atau I-
# Gabungkan token subword dengan kata sebelumnya jika diawali "##"
if word.startswith("#"):
current_word += word.lstrip("#") # Hapus semua "#" di awal kata
else:
if current_word and current_entity:
merged_entities.append({"word": current_word, "entity": current_entity}) # Simpan kata sebelumnya
current_word = word # Mulai kata baru
current_entity = entity_type # Simpan tipe entity
if current_word and current_entity: # Tambahkan kata terakhir yang sudah digabung
merged_entities.append({"word": current_word, "entity": current_entity})
return merged_entities
# Fungsi untuk highlight teks dan menampilkan daftar entitas yang dikenali
def highlight_text(text, entities):
words = text.split(' ') # Pisahkan teks menjadi daftar kata
entity_words = {ent["word"].lower(): ent for ent in entities} # Buat dict untuk lookup cepat
for i, word in enumerate(words):
clean_word = word.strip('.,!?()[]') # Hilangkan tanda baca di sekitar kata
lower_word = clean_word.lower()
if lower_word in entity_words:
words[i] = f"<span style='background-color: #ffcc80; color: black; padding: 2px; border-radius: 4px;'>{word}</span>"
highlighted_text = ' '.join(words) # Gabungkan kembali teks
# Buat daftar entitas yang dikenali
if entities:
entity_list = "<h4>πŸ” Recognized Medical Entities:</h4><ul>"
for ent in entities:
entity_list += f"<li><strong>{ent['word']}</strong> ({ent['entity']})</li>"
entity_list += "</ul>"
else:
entity_list = "<p><em>No medical entities detected.</em></p>"
return highlighted_text + "<br><br>" + entity_list
# Fungsi chatbot dengan NER
def chat_with_ner(message, history):
entities = extract_entities(message)
recognized_entities = [ent["word"] for ent in entities] # Ambil daftar kata yang dikenali
if recognized_entities:
prompt = f"This text contains medical terms: {', '.join(recognized_entities)}. Please explain briefly."
else:
prompt = message
show_to_history = f"Medical Object Recognized : {', '.join(recognized_entities)}. Here are the informations about the recognized medical object."
response = client.text_generation(prompt, max_new_tokens=100) # Gunakan text_generation()
highlighted_message = highlight_text(message, entities)
history.append((show_to_history, response)) # Tambahkan ke history
return history, highlighted_message
# Desain chatbot
with gr.Blocks() as demo:
gr.Markdown(
"<h1 style='text-align: center;'>πŸš€ Mistral AI Chatbot</h1>"
"<p style='text-align: center;'>πŸ’¬ Chat with Mistral-7B and experience advanced AI conversations!</p>",
)
chatbot = gr.Chatbot(label="Mistral AI Assistant")
message = gr.Textbox(placeholder="Type your message here...", label="Your Message")
highlighted_output = gr.HTML(label="Highlighted Text (NER)") # Ganti ke gr.HTML agar teks ter-highlight
send_btn = gr.Button("Send πŸš€")
def respond(user_input, chat_history):
return chat_with_ner(user_input, chat_history)
send_btn.click(respond, inputs=[message, chatbot], outputs=[chatbot, highlighted_output])
# Jalankan aplikasi
demo.launch()