bytedancerneat commited on
Commit
b263d05
·
verified ·
1 Parent(s): 09179c9

Update interface.py

Browse files
Files changed (1) hide show
  1. interface.py +185 -185
interface.py CHANGED
@@ -1,186 +1,186 @@
1
- import pandas as pd
2
- import json
3
- import re
4
- from json import loads, JSONDecodeError
5
- import sys
6
- import os
7
- import ast
8
- from util.vector_base import EmbeddingFunction, get_or_create_vector_base
9
- from doubao_service import DouBaoService
10
- from PROMPT_TEMPLATE import prompt_template
11
- from util.Embeddings import TextEmb3LargeEmbedding
12
- from langchain_core.documents import Document
13
- from FlagEmbedding import FlagReranker
14
- from retriever import retriever
15
- import time
16
- from bm25s import BM25, tokenize
17
- import contextlib
18
- import io
19
-
20
- import gradio as gr
21
- import time
22
-
23
- client = DouBaoService("DouBao128Pro")
24
- embeddingmodel = TextEmb3LargeEmbedding(max_qpm=58)
25
- embedding = EmbeddingFunction(embeddingmodel)
26
- safeguard_vector_store = get_or_create_vector_base('safeguard_database', embedding)
27
-
28
- # reranker_model = FlagReranker(
29
- # 'C://Users//Admin//Desktop//PDPO//NLL_LLM//model//bge-reranker-v2-m3',
30
- # use_fp16=True,
31
- # devices=["cpu"],
32
- # )
33
-
34
- OPTIONS = ['AI Governance',
35
- 'Data Accuracy',
36
- 'Data Minimization & Purpose Limitation',
37
- 'Data Retention',
38
- 'Data Security',
39
- 'Data Sharing',
40
- 'Individual Rights',
41
- 'Privacy by Design',
42
- 'Transparency']
43
-
44
-
45
- def format_model_output(raw_output):
46
- """
47
- 处理模型输出:
48
- - 将 \n 转换为实际换行
49
- - 提取 ```json ``` 中的内容并格式化为可折叠的 JSON
50
- """
51
- formatted = raw_output.replace('\\n', '\n')
52
- def replace_json(match):
53
- json_str = match.group(1).strip()
54
- try:
55
- json_obj = loads(json_str)
56
- return f"```json\n{json.dumps(json_obj, indent=2, ensure_ascii=False)}\n```"
57
- except JSONDecodeError:
58
- return match.group(0)
59
-
60
- formatted = re.sub(r'```json\n?(.*?)\n?```', replace_json, formatted, flags=re.DOTALL)
61
- return ast.literal_eval(formatted)
62
-
63
- def model_predict(input_text, if_split_po, topk, selected_items):
64
- """
65
- selected_items: 用户选择的项目(可能是["All"]或具体PO)
66
- """
67
- requirement = input_text
68
- requirement = requirement.replace("\t", "").replace("\n", "").replace("\r", "")
69
- if "All" in selected_items:
70
- PO = OPTIONS
71
- else:
72
- PO = selected_items
73
- if topk:
74
- topk = int(topk)
75
- else:
76
- topk = 10
77
- final_result = retriever(
78
- requirement,
79
- PO,
80
- safeguard_vector_store,
81
- reranker_model=None,
82
- using_reranker=False,
83
- using_BM25=False,
84
- using_chroma=True,
85
- k=topk,
86
- if_split_po=if_split_po
87
- )
88
- mapping_safeguards = {}
89
- for safeguard in final_result:
90
- if safeguard[3] not in mapping_safeguards:
91
- mapping_safeguards[safeguard[3]] = []
92
- mapping_safeguards[safeguard[3]].append(
93
- {
94
- "Score": safeguard[0],
95
- "Safeguard Number": safeguard[1],
96
- "Safeguard Description": safeguard[2]
97
- }
98
- )
99
- prompt = prompt_template(requirement, mapping_safeguards)
100
- response = client.chat_complete(messages=[
101
- {"role": "system", "content": "You are a helpful assistant."},
102
- {"role": "user", "content": prompt},
103
- ])
104
- # return {"requirement": requirement, "safeguards": mapping_safeguards}
105
- print("requirement:", requirement)
106
- print("mapping safeguards:", mapping_safeguards)
107
- print("response:", response)
108
- return {"requirement": requirement, "safeguards": format_model_output(response)}
109
-
110
- with gr.Blocks(title="New Law Landing") as demo:
111
- gr.Markdown("## 🏙️ New Law Landing")
112
-
113
- requirement = gr.Textbox(label="Input Requirements", placeholder="Example: Data Minimization Consent for incompatible purposes")
114
- details = gr.Textbox(label="Input Details", placeholder="Example: Require consent for...")
115
-
116
- # 修改为 Number 输入组件
117
- topk = gr.Number(
118
- label="Top K safeguards",
119
- value=10,
120
- precision=0,
121
- minimum=1,
122
- interactive=True
123
- )
124
-
125
- with gr.Row():
126
- with gr.Column(scale=1):
127
- if_split_po = gr.Checkbox(
128
- label="If Split Privacy Objective",
129
- value=True,
130
- info="Recall K Safeguards for each Privacy Objective"
131
- )
132
- with gr.Column(scale=1):
133
- all_checkbox = gr.Checkbox(
134
- label="ALL Privacy Objective",
135
- value=True,
136
- info="No specific Privacy Objective is specified"
137
- )
138
- with gr.Column(scale=4):
139
- PO_checklist = gr.CheckboxGroup(
140
- label="Choose Privacy Objective",
141
- choices=OPTIONS,
142
- value=[],
143
- interactive=True
144
- )
145
-
146
- submit_btn = gr.Button("Submit", variant="primary")
147
- result_output = gr.JSON(label="Related safeguards", open=True)
148
-
149
-
150
- def sync_checkboxes(selected_items, all_selected):
151
- if len(selected_items) > 0:
152
- return False
153
- return all_selected
154
-
155
- PO_checklist.change(
156
- fn=sync_checkboxes,
157
- inputs=[PO_checklist, all_checkbox],
158
- outputs=all_checkbox
159
- )
160
-
161
- def sync_all(selected_all, current_selection):
162
- if selected_all:
163
- return []
164
- return current_selection
165
-
166
- all_checkbox.change(
167
- fn=sync_all,
168
- inputs=[all_checkbox, PO_checklist],
169
- outputs=PO_checklist
170
- )
171
-
172
- def process_inputs(requirement, details, topk, if_split_po, all_selected, PO_selected):
173
- input_text = requirement + ": " + details
174
- if all_selected:
175
- return model_predict(input_text, if_split_po, int(topk), ["All"])
176
- else:
177
- return model_predict(input_text, if_split_po, int(topk), PO_selected)
178
-
179
- submit_btn.click(
180
- fn=process_inputs,
181
- inputs=[requirement, details, topk, if_split_po, all_checkbox, PO_checklist],
182
- outputs=[result_output]
183
- )
184
-
185
- if __name__ == "__main__":
186
  demo.launch(share=True)
 
1
+ import pandas as pd
2
+ import json
3
+ import re
4
+ from json import loads, JSONDecodeError
5
+ import sys
6
+ import os
7
+ import ast
8
+ from util.vector_base import EmbeddingFunction, get_or_create_vector_base
9
+ from doubao_service import DouBaoService
10
+ from PROMPT_TEMPLATE import prompt_template
11
+ from util.Embeddings import TextEmb3LargeEmbedding
12
+ from langchain_core.documents import Document
13
+ from FlagEmbedding import FlagReranker
14
+ from retriever import retriever
15
+ import time
16
+ # from bm25s import BM25, tokenize
17
+ import contextlib
18
+ import io
19
+
20
+ import gradio as gr
21
+ import time
22
+
23
+ client = DouBaoService("DouBao128Pro")
24
+ embeddingmodel = TextEmb3LargeEmbedding(max_qpm=58)
25
+ embedding = EmbeddingFunction(embeddingmodel)
26
+ safeguard_vector_store = get_or_create_vector_base('safeguard_database', embedding)
27
+
28
+ # reranker_model = FlagReranker(
29
+ # 'C://Users//Admin//Desktop//PDPO//NLL_LLM//model//bge-reranker-v2-m3',
30
+ # use_fp16=True,
31
+ # devices=["cpu"],
32
+ # )
33
+
34
+ OPTIONS = ['AI Governance',
35
+ 'Data Accuracy',
36
+ 'Data Minimization & Purpose Limitation',
37
+ 'Data Retention',
38
+ 'Data Security',
39
+ 'Data Sharing',
40
+ 'Individual Rights',
41
+ 'Privacy by Design',
42
+ 'Transparency']
43
+
44
+
45
+ def format_model_output(raw_output):
46
+ """
47
+ 处理模型输出:
48
+ - 将 \n 转换为实际换行
49
+ - 提取 ```json ``` 中的内容并格式化为可折叠的 JSON
50
+ """
51
+ formatted = raw_output.replace('\\n', '\n')
52
+ def replace_json(match):
53
+ json_str = match.group(1).strip()
54
+ try:
55
+ json_obj = loads(json_str)
56
+ return f"```json\n{json.dumps(json_obj, indent=2, ensure_ascii=False)}\n```"
57
+ except JSONDecodeError:
58
+ return match.group(0)
59
+
60
+ formatted = re.sub(r'```json\n?(.*?)\n?```', replace_json, formatted, flags=re.DOTALL)
61
+ return ast.literal_eval(formatted)
62
+
63
+ def model_predict(input_text, if_split_po, topk, selected_items):
64
+ """
65
+ selected_items: 用户选择的项目(可能是["All"]或具体PO)
66
+ """
67
+ requirement = input_text
68
+ requirement = requirement.replace("\t", "").replace("\n", "").replace("\r", "")
69
+ if "All" in selected_items:
70
+ PO = OPTIONS
71
+ else:
72
+ PO = selected_items
73
+ if topk:
74
+ topk = int(topk)
75
+ else:
76
+ topk = 10
77
+ final_result = retriever(
78
+ requirement,
79
+ PO,
80
+ safeguard_vector_store,
81
+ reranker_model=None,
82
+ using_reranker=False,
83
+ using_BM25=False,
84
+ using_chroma=True,
85
+ k=topk,
86
+ if_split_po=if_split_po
87
+ )
88
+ mapping_safeguards = {}
89
+ for safeguard in final_result:
90
+ if safeguard[3] not in mapping_safeguards:
91
+ mapping_safeguards[safeguard[3]] = []
92
+ mapping_safeguards[safeguard[3]].append(
93
+ {
94
+ "Score": safeguard[0],
95
+ "Safeguard Number": safeguard[1],
96
+ "Safeguard Description": safeguard[2]
97
+ }
98
+ )
99
+ prompt = prompt_template(requirement, mapping_safeguards)
100
+ response = client.chat_complete(messages=[
101
+ {"role": "system", "content": "You are a helpful assistant."},
102
+ {"role": "user", "content": prompt},
103
+ ])
104
+ # return {"requirement": requirement, "safeguards": mapping_safeguards}
105
+ print("requirement:", requirement)
106
+ print("mapping safeguards:", mapping_safeguards)
107
+ print("response:", response)
108
+ return {"requirement": requirement, "safeguards": format_model_output(response)}
109
+
110
+ with gr.Blocks(title="New Law Landing") as demo:
111
+ gr.Markdown("## 🏙️ New Law Landing")
112
+
113
+ requirement = gr.Textbox(label="Input Requirements", placeholder="Example: Data Minimization Consent for incompatible purposes")
114
+ details = gr.Textbox(label="Input Details", placeholder="Example: Require consent for...")
115
+
116
+ # 修改为 Number 输入组件
117
+ topk = gr.Number(
118
+ label="Top K safeguards",
119
+ value=10,
120
+ precision=0,
121
+ minimum=1,
122
+ interactive=True
123
+ )
124
+
125
+ with gr.Row():
126
+ with gr.Column(scale=1):
127
+ if_split_po = gr.Checkbox(
128
+ label="If Split Privacy Objective",
129
+ value=True,
130
+ info="Recall K Safeguards for each Privacy Objective"
131
+ )
132
+ with gr.Column(scale=1):
133
+ all_checkbox = gr.Checkbox(
134
+ label="ALL Privacy Objective",
135
+ value=True,
136
+ info="No specific Privacy Objective is specified"
137
+ )
138
+ with gr.Column(scale=4):
139
+ PO_checklist = gr.CheckboxGroup(
140
+ label="Choose Privacy Objective",
141
+ choices=OPTIONS,
142
+ value=[],
143
+ interactive=True
144
+ )
145
+
146
+ submit_btn = gr.Button("Submit", variant="primary")
147
+ result_output = gr.JSON(label="Related safeguards", open=True)
148
+
149
+
150
+ def sync_checkboxes(selected_items, all_selected):
151
+ if len(selected_items) > 0:
152
+ return False
153
+ return all_selected
154
+
155
+ PO_checklist.change(
156
+ fn=sync_checkboxes,
157
+ inputs=[PO_checklist, all_checkbox],
158
+ outputs=all_checkbox
159
+ )
160
+
161
+ def sync_all(selected_all, current_selection):
162
+ if selected_all:
163
+ return []
164
+ return current_selection
165
+
166
+ all_checkbox.change(
167
+ fn=sync_all,
168
+ inputs=[all_checkbox, PO_checklist],
169
+ outputs=PO_checklist
170
+ )
171
+
172
+ def process_inputs(requirement, details, topk, if_split_po, all_selected, PO_selected):
173
+ input_text = requirement + ": " + details
174
+ if all_selected:
175
+ return model_predict(input_text, if_split_po, int(topk), ["All"])
176
+ else:
177
+ return model_predict(input_text, if_split_po, int(topk), PO_selected)
178
+
179
+ submit_btn.click(
180
+ fn=process_inputs,
181
+ inputs=[requirement, details, topk, if_split_po, all_checkbox, PO_checklist],
182
+ outputs=[result_output]
183
+ )
184
+
185
+ if __name__ == "__main__":
186
  demo.launch(share=True)