|
import gradio as gr |
|
import tempfile, os, shutil |
|
import requests, json, uuid, time, asyncio |
|
from typing import Any, List |
|
from pydantic import PrivateAttr |
|
|
|
|
|
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings |
|
from llama_index.core.chat_engine import CondenseQuestionChatEngine |
|
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata |
|
from llama_index.core.embeddings import BaseEmbedding |
|
|
|
|
|
from clova_llama_index import ClovaClient, ClovaIndexEmbeddings, ClovaLLM |
|
|
|
|
|
from llama_index.core import ( |
|
VectorStoreIndex, |
|
SimpleDirectoryReader, |
|
StorageContext, |
|
SimpleKeywordTableIndex, |
|
TreeIndex, |
|
Settings |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clova_api_key = os.environ.get("API_KEY") |
|
print(clova_api_key) |
|
client = ClovaClient(api_key=clova_api_key) |
|
|
|
|
|
Settings.llm = ClovaLLM(client) |
|
Settings.embed_model = ClovaIndexEmbeddings(client) |
|
Settings.chunk_size = 4096 |
|
|
|
chat_engine = None |
|
|
|
def chat_with_docs(message, history): |
|
global chat_engine |
|
if chat_engine is None: |
|
return "β οΈ chat_engineμ μ°Ύμ§ λͺ»νμ΅λλ€." |
|
response = chat_engine.chat(message) |
|
return response.response |
|
|
|
def show_image(): |
|
return "./img/19.jpg" |
|
|
|
with gr.Blocks() as demo: |
|
print("with gr.Blocks() as demo:") |
|
gr.Markdown("## π€ HyperCLOVA κΈ°λ° λ¬Έμ μ±λ΄") |
|
|
|
documents = SimpleDirectoryReader('./law', required_exts=[".pdf", ".txt"]).load_data() |
|
|
|
|
|
|
|
nodes = Settings.node_parser.get_nodes_from_documents(documents) |
|
|
|
|
|
embed_model = ClovaIndexEmbeddings(client, embed_batch_size=1) |
|
|
|
Settings.embed_model = embed_model |
|
Settings.chunk_size = 1024 |
|
Settings.chunk_overlap = 128 |
|
|
|
|
|
storage_context = StorageContext.from_defaults() |
|
storage_context.docstore.add_documents(nodes) |
|
|
|
index = VectorStoreIndex.from_documents(documents) |
|
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True) |
|
|
|
embed_model = ClovaIndexEmbeddings(client, embed_batch_size=1) |
|
|
|
print("keyword_index start") |
|
keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context) |
|
print("vector_index start") |
|
vector_index = VectorStoreIndex(nodes, storage_context=storage_context) |
|
print("tree_index start") |
|
tree_index = TreeIndex(nodes, storage_context=storage_context) |
|
|
|
from llama_index.core.tools import QueryEngineTool |
|
|
|
vector_query_engine = vector_index.as_query_engine( |
|
response_mode="tree_summarize", use_async=True, name="vector" |
|
) |
|
keyword_query_engine = keyword_index.as_query_engine( |
|
response_mode="tree_summarize", use_async=True, name="keyword" |
|
) |
|
tree_query_engine = tree_index.as_query_engine( |
|
response_mode="tree_summarize", use_async=True, name="tree" |
|
) |
|
|
|
vector_tool = QueryEngineTool.from_defaults( |
|
query_engine=vector_query_engine, |
|
description=( |
|
"λ²λ₯ μ λν μ λ°μ μΈ κ²μμ΄ νμν λ νμ©νμΈμ." |
|
) |
|
) |
|
keyword_tool = QueryEngineTool.from_defaults( |
|
query_engine=keyword_query_engine, |
|
description=( |
|
"ν€μλλ‘ λ²λ₯ κ²μμ μ§νν λ νμ©νμΈμ." |
|
) |
|
) |
|
tree_tool = QueryEngineTool.from_defaults( |
|
query_engine=tree_query_engine, |
|
description=( |
|
"λ²λ₯ μ 체λ₯Ό Tree ννλ‘ λ§λ€μ΄μ κ²μμ ν λ νμ©ν©λλ€." |
|
) |
|
) |
|
|
|
from llama_index.core import VectorStoreIndex |
|
from llama_index.core.objects import ObjectIndex |
|
|
|
obj_index = ObjectIndex.from_objects( |
|
[vector_tool, keyword_tool, tree_tool], |
|
index_cls=VectorStoreIndex, |
|
) |
|
|
|
from llama_index.core.query_engine import ToolRetrieverRouterQueryEngine |
|
query_engine = ToolRetrieverRouterQueryEngine(obj_index.as_retriever()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chatbot = gr.ChatInterface(fn=chat_with_docs, title="π λ¬Έμ κΈ°λ° μ±λ΄", type="messages") |
|
|
|
|
|
|
|
demo.launch() |
|
|