import pandas as pd import json import re from json import loads, JSONDecodeError import sys import os import ast from util.vector_base import EmbeddingFunction, get_or_create_vector_base from doubao_service import DouBaoService from PROMPT_TEMPLATE import prompt_template from util.Embeddings import TextEmb3LargeEmbedding from langchain_core.documents import Document from FlagEmbedding import FlagReranker from retriever import retriever import time # from bm25s import BM25, tokenize import contextlib import io import gradio as gr import time client = DouBaoService("DouBao128Pro") embeddingmodel = TextEmb3LargeEmbedding(max_qpm=58) embedding = EmbeddingFunction(embeddingmodel) safeguard_vector_store = get_or_create_vector_base('safeguard_database', embedding) # reranker_model = FlagReranker( # 'C://Users//Admin//Desktop//PDPO//NLL_LLM//model//bge-reranker-v2-m3', # use_fp16=True, # devices=["cpu"], # ) OPTIONS = ['AI Governance', 'Data Accuracy', 'Data Minimization & Purpose Limitation', 'Data Retention', 'Data Security', 'Data Sharing', 'Individual Rights', 'Privacy by Design', 'Transparency'] def format_model_output(raw_output): """ 处理模型输出: - 将 \n 转换为实际换行 - 提取 ```json ``` 中的内容并格式化为可折叠的 JSON """ formatted = raw_output.replace('\\n', '\n') def replace_json(match): json_str = match.group(1).strip() try: json_obj = loads(json_str) return f"```json\n{json.dumps(json_obj, indent=2, ensure_ascii=False)}\n```" except JSONDecodeError: return match.group(0) formatted = re.sub(r'```json\n?(.*?)\n?```', replace_json, formatted, flags=re.DOTALL) return ast.literal_eval(formatted) def model_predict(input_text, if_split_po, topk, selected_items): """ selected_items: 用户选择的项目(可能是["All"]或具体PO) """ requirement = input_text requirement = requirement.replace("\t", "").replace("\n", "").replace("\r", "") if "All" in selected_items: PO = OPTIONS else: PO = selected_items if topk: topk = int(topk) else: topk = 10 final_result = retriever( requirement, PO, safeguard_vector_store, reranker_model=None, using_reranker=False, using_BM25=False, using_chroma=True, k=topk, if_split_po=if_split_po ) mapping_safeguards = {} for safeguard in final_result: if safeguard[3] not in mapping_safeguards: mapping_safeguards[safeguard[3]] = [] mapping_safeguards[safeguard[3]].append( { "Score": safeguard[0], "Safeguard Number": safeguard[1], "Safeguard Description": safeguard[2] } ) prompt = prompt_template(requirement, mapping_safeguards) response = client.chat_complete(messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ]) # return {"requirement": requirement, "safeguards": mapping_safeguards} print("requirement:", requirement) print("mapping safeguards:", mapping_safeguards) print("response:", response) return {"requirement": requirement, "safeguards": format_model_output(response)} with gr.Blocks(title="New Law Landing") as demo: gr.Markdown("## 🏙️ New Law Landing") requirement = gr.Textbox(label="Input Requirements", placeholder="Example: Data Minimization Consent for incompatible purposes") details = gr.Textbox(label="Input Details", placeholder="Example: Require consent for...") # 修改为 Number 输入组件 topk = gr.Number( label="Top K safeguards", value=10, precision=0, minimum=1, interactive=True ) with gr.Row(): with gr.Column(scale=1): if_split_po = gr.Checkbox( label="If Split Privacy Objective", value=True, info="Recall K Safeguards for each Privacy Objective" ) with gr.Column(scale=1): all_checkbox = gr.Checkbox( label="ALL Privacy Objective", value=True, info="No specific Privacy Objective is specified" ) with gr.Column(scale=4): PO_checklist = gr.CheckboxGroup( label="Choose Privacy Objective", choices=OPTIONS, value=[], interactive=True ) submit_btn = gr.Button("Submit", variant="primary") result_output = gr.JSON(label="Related safeguards", open=True) def sync_checkboxes(selected_items, all_selected): if len(selected_items) > 0: return False return all_selected PO_checklist.change( fn=sync_checkboxes, inputs=[PO_checklist, all_checkbox], outputs=all_checkbox ) def sync_all(selected_all, current_selection): if selected_all: return [] return current_selection all_checkbox.change( fn=sync_all, inputs=[all_checkbox, PO_checklist], outputs=PO_checklist ) def process_inputs(requirement, details, topk, if_split_po, all_selected, PO_selected): input_text = requirement + ": " + details if all_selected: return model_predict(input_text, if_split_po, int(topk), ["All"]) else: return model_predict(input_text, if_split_po, int(topk), PO_selected) submit_btn.click( fn=process_inputs, inputs=[requirement, details, topk, if_split_po, all_checkbox, PO_checklist], outputs=[result_output] ) if __name__ == "__main__": demo.launch(share=True)