File size: 4,034 Bytes
ad2e225
fbd5bc8
8be2de0
980cbe4
ad2e225
728be19
 
 
 
8be2de0
 
fbd5bc8
cb6a64d
8be2de0
980cbe4
3852a38
 
5294576
980cbe4
3852a38
 
980cbe4
3852a38
 
1aa0592
 
3852a38
5294576
 
 
3852a38
980cbe4
 
5294576
 
980cbe4
 
e9f046d
fbd5bc8
68e2fc7
fbd5bc8
27d81dc
 
d255a88
27d81dc
 
 
 
 
 
 
 
68e2fc7
 
 
 
 
5294576
68e2fc7
 
 
 
27d81dc
 
d255a88
fbd5bc8
 
 
5294576
fbd5bc8
 
 
 
728be19
e9f046d
4c5bfec
fbd5bc8
cb6a64d
fbbade5
728be19
 
e14a117
fbd5bc8
4c5bfec
fbd5bc8
 
 
 
e14a117
4c5bfec
fbd5bc8
4c5bfec
fbd5bc8
 
 
728be19
e14a117
fbd5bc8
8be2de0
fbd5bc8
728be19
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline
import re

# Load chatbot menggunakan InferenceClient
client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")

# Load model Named Entity Recognition (NER)
ner_pipeline = pipeline("ner", model="d4data/biomedical-ner-all")

# Fungsi untuk ekstraksi entitas medis dari teks
def extract_entities(text):
    entities = ner_pipeline(text)
    
    merged_entities = []
    current_word = ""
    current_entity = None
    
    for ent in entities:
        word = ent["word"]
        entity_type = ent["entity"].split("-")[-1]  # Ambil tipe entitas tanpa B- atau I-
        
        # Gabungkan token subword dengan kata sebelumnya jika diawali "##"
        if word.startswith("#"):
            current_word += word.lstrip("#")  # Hapus semua "#" di awal kata
        else:
            if current_word and current_entity:
                merged_entities.append({"word": current_word, "entity": current_entity})  # Simpan kata sebelumnya
            
            current_word = word  # Mulai kata baru
            current_entity = entity_type  # Simpan tipe entity
    
    if current_word and current_entity:  # Tambahkan kata terakhir yang sudah digabung
        merged_entities.append({"word": current_word, "entity": current_entity})
    
    
    return merged_entities

# Fungsi untuk highlight teks dan menampilkan daftar entitas yang dikenali
def highlight_text(text, entities):
    words = text.split(' ')  # Pisahkan teks menjadi daftar kata
    entity_words = {ent["word"].lower(): ent for ent in entities}  # Buat dict untuk lookup cepat

    for i, word in enumerate(words):
        clean_word = word.strip('.,!?()[]')  # Hilangkan tanda baca di sekitar kata
        lower_word = clean_word.lower()

        if lower_word in entity_words:
            words[i] = f"<span style='background-color: #ffcc80; color: black; padding: 2px; border-radius: 4px;'>{word}</span>"

    highlighted_text = ' '.join(words)  # Gabungkan kembali teks

    # Buat daftar entitas yang dikenali
    if entities:
        entity_list = "<h4>πŸ” Recognized Medical Entities:</h4><ul>"
        for ent in entities:
            entity_list += f"<li><strong>{ent['word']}</strong> ({ent['entity']})</li>"
        entity_list += "</ul>"
    else:
        entity_list = "<p><em>No medical entities detected.</em></p>"

    return highlighted_text + "<br><br>" + entity_list


# Fungsi chatbot dengan NER
def chat_with_ner(message, history):
    entities = extract_entities(message)
    recognized_entities = [ent["word"] for ent in entities]  # Ambil daftar kata yang dikenali

    if recognized_entities:
        prompt = f"This text contains medical terms: {', '.join(recognized_entities)}. Please explain briefly."
    else:
        prompt = message  
    show_to_history = f"Medical Object Recognized : {', '.join(recognized_entities)}. Here are the informations about the recognized medical object."
    response = client.text_generation(prompt, max_new_tokens=100)  # Gunakan text_generation()
    highlighted_message = highlight_text(message, entities)

    history.append((show_to_history, response))  # Tambahkan ke history

    return history, highlighted_message

# Desain chatbot
with gr.Blocks() as demo:
    gr.Markdown(
        "<h1 style='text-align: center;'>πŸš€ Mistral AI Chatbot</h1>"
        "<p style='text-align: center;'>πŸ’¬ Chat with Mistral-7B and experience advanced AI conversations!</p>",
    )

    chatbot = gr.Chatbot(label="Mistral AI Assistant")
    message = gr.Textbox(placeholder="Type your message here...", label="Your Message")
    highlighted_output = gr.HTML(label="Highlighted Text (NER)")  # Ganti ke gr.HTML agar teks ter-highlight
    send_btn = gr.Button("Send πŸš€")

    def respond(user_input, chat_history):
        return chat_with_ner(user_input, chat_history)

    send_btn.click(respond, inputs=[message, chatbot], outputs=[chatbot, highlighted_output])

# Jalankan aplikasi
demo.launch()