Spaces:

studio-ousia
/

luxe-demo

Running on CPU Upgrade

App Files Files Community

singletongue commited on 29 days ago

Commit

913e5a4

verified ·

1 Parent(s): 3f97903

Use ja-v0.3.2 model, add submit button

Browse files

Files changed (1) hide show

app.py +45 -48

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ MAX_TEXT_FILE_LINES = 100
 MAX_ENTITY_FILE_LINES = 1000
 repo_id = "studio-ousia/luxe"
-revision = "ja-v0.3.1"
 nayose_repo_id = "studio-ousia/luxe-nayose-bm25"
@@ -218,7 +218,7 @@ def get_topk_entities_from_texts(
         else:
             topk_span_entities.append([])
-    return batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
 def get_selected_entity(evt: gr.SelectData):
@@ -391,7 +391,8 @@ with gr.Blocks() as demo:
     # cf. https://www.gradio.app/docs/gradio/state#param-state-value
     models = gr.State((model, tokenizer, bm25_tokenizer, bm25_retriever))
-    texts = gr.State([])
     entity_replaced_counts = gr.State(0)
@@ -414,12 +415,37 @@ with gr.Blocks() as demo:
     with gr.Tab(label="直接入力"):
         text_input = gr.Textbox(label=f"入力テキスト（最大{MAX_TEXT_LENGTH}文字）", max_length=MAX_TEXT_LENGTH)
     with gr.Tab(label="ファイルアップロード"):
-        texts_file = gr.File(label=f"入力テキストファイル（最大{MAX_TEXT_FILE_LINES}行）")
     with gr.Accordion(label="LUXEのエンティティ語彙を置き換える", open=False):
-        new_entity_text_pairs_file = gr.File(
-            label=f"エンティティと説明文のCSVファイル（最大{MAX_ENTITY_FILE_LINES}行）"
         )
         new_entity_text_pairs_input = gr.Dataframe(
             # value=sample_new_entity_text_pairs,
             headers=["entity", "text"],
@@ -429,6 +455,7 @@ with gr.Blocks() as demo:
             interactive=True,
         )
         replace_entity_button = gr.Button(value="エンティティ語彙を置き換える")
     new_entity_text_pairs_file.change(
         fn=get_new_entity_text_pairs_from_file, inputs=new_entity_text_pairs_file, outputs=new_entity_text_pairs_input
@@ -439,59 +466,29 @@ with gr.Blocks() as demo:
         outputs=entity_replaced_counts,
     )
-    with gr.Accordion(label="ハイパーパラメータ", open=False):
-        topk_input = gr.Number(5, label="エンティティ件数", interactive=True)
-        entity_span_sensitivity_input = gr.Slider(
-            minimum=0.0, maximum=5.0, value=1.0, step=0.1, label="エンティティ検出の積極度", interactive=True
-        )
-        nayose_coef_input = gr.Slider(
-            minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="文字列一致の優先度", interactive=True
-        )
-    text_input.change(fn=lambda text: [normalize_text(text)], inputs=text_input, outputs=texts)
-    texts_file.change(fn=get_texts_from_file, inputs=texts_file, outputs=texts)
-    topk_input.change(fn=lambda val: val, inputs=topk_input, outputs=topk)
-    entity_span_sensitivity_input.change(
-        fn=lambda val: val, inputs=entity_span_sensitivity_input, outputs=entity_span_sensitivity
-    )
-    nayose_coef_input.change(fn=lambda val: val, inputs=nayose_coef_input, outputs=nayose_coef)
-    texts.change(
-        fn=get_topk_entities_from_texts,
-        inputs=[models, texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
-        outputs=[batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
-    )
-    topk.change(
-        fn=get_topk_entities_from_texts,
-        inputs=[models, texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
-        outputs=[batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
-    )
-    entity_span_sensitivity.change(
         fn=get_topk_entities_from_texts,
-        inputs=[models, texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
-        outputs=[batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
     )
-    nayose_coef.change(
         fn=get_topk_entities_from_texts,
-        inputs=[models, texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
-        outputs=[batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
-    )
-    entity_replaced_counts.change(
-        fn=get_topk_entities_from_texts,
-        inputs=[models, texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
-        outputs=[batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
     )
     gr.Markdown("---")
     gr.Markdown("## 出力エンティティ")
-    @gr.render(inputs=[texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities])
     def render_topk_entities(
-        texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
     ):
         for text, entity_spans, normal_entities, category_entities, span_entities in zip(
-            texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
         ):
             highlighted_text_value = []
             cur = 0

 MAX_ENTITY_FILE_LINES = 1000
 repo_id = "studio-ousia/luxe"
+revision = "ja-v0.3.2"
 nayose_repo_id = "studio-ousia/luxe-nayose-bm25"
         else:
             topk_span_entities.append([])
+    return texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
 def get_selected_entity(evt: gr.SelectData):
     # cf. https://www.gradio.app/docs/gradio/state#param-state-value
     models = gr.State((model, tokenizer, bm25_tokenizer, bm25_retriever))
+    input_texts = gr.State([])
+    output_texts = gr.State([])
     entity_replaced_counts = gr.State(0)
     with gr.Tab(label="直接入力"):
         text_input = gr.Textbox(label=f"入力テキスト（最大{MAX_TEXT_LENGTH}文字）", max_length=MAX_TEXT_LENGTH)
     with gr.Tab(label="ファイルアップロード"):
+        gr.Markdown(f"1行1事例のテキストファイル（最大{MAX_TEXT_FILE_LINES}行）をアップロードできます。")
+        texts_file = gr.File(label="入力テキストファイル")
+    with gr.Accordion(label="ハイパーパラメータ", open=False):
+        topk_input = gr.Number(5, label="エンティティ件数", interactive=True)
+        entity_span_sensitivity_input = gr.Slider(
+            minimum=0.0, maximum=5.0, value=1.0, step=0.1, label="エンティティ検出の積極度", interactive=True
+        )
+        nayose_coef_input = gr.Slider(
+            minimum=0.0, maximum=2.0, value=1.0, step=0.1, label="文字列一致の優先度", interactive=True
+        )
+    text_input.change(fn=lambda text: [normalize_text(text)], inputs=text_input, outputs=input_texts)
+    texts_file.change(fn=get_texts_from_file, inputs=texts_file, outputs=input_texts)
+    topk_input.change(fn=lambda val: val, inputs=topk_input, outputs=topk)
+    entity_span_sensitivity_input.change(
+        fn=lambda val: val, inputs=entity_span_sensitivity_input, outputs=entity_span_sensitivity
+    )
+    nayose_coef_input.change(fn=lambda val: val, inputs=nayose_coef_input, outputs=nayose_coef)
     with gr.Accordion(label="LUXEのエンティティ語彙を置き換える", open=False):
+        gr.Markdown(
+            """LUXEのモデルのエンティティの語彙を任意のエンティティ集合に置き換えます。
+            エンティティと共に与えられるエンティティの説明文から、エンティティの埋め込みが計算されます。""",
+            line_breaks=True,
         )
+        gr.Markdown(
+            f"「エンティティ」と「エンティティの説明文」の2列からなるCSVファイル（最大{MAX_ENTITY_FILE_LINES}行）をアップロードできます。"
+        )
+        new_entity_text_pairs_file = gr.File(label="エンティティと説明文のCSVファイル", height="128px")
+        gr.Markdown("CSVファイルから読み込まれた項目が以下の表に表示されます。表の内容を直接編集することも可能です。")
         new_entity_text_pairs_input = gr.Dataframe(
             # value=sample_new_entity_text_pairs,
             headers=["entity", "text"],
             interactive=True,
         )
         replace_entity_button = gr.Button(value="エンティティ語彙を置き換える")
+        gr.Markdown("LUXEのモデルのエンティティ語彙は、デモページの再読み込み時にリセットされます。")
     new_entity_text_pairs_file.change(
         fn=get_new_entity_text_pairs_from_file, inputs=new_entity_text_pairs_file, outputs=new_entity_text_pairs_input
         outputs=entity_replaced_counts,
     )
+    submit_button = gr.Button(value="予測実行", variant="huggingface")
+    submit_button.click(
         fn=get_topk_entities_from_texts,
+        inputs=[models, input_texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
+        outputs=[output_texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
     )
+    text_input.submit(
         fn=get_topk_entities_from_texts,
+        inputs=[models, input_texts, topk, entity_span_sensitivity, nayose_coef, entity_replaced_counts],
+        outputs=[output_texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities],
     )
     gr.Markdown("---")
     gr.Markdown("## 出力エンティティ")
+    @gr.render(
+        inputs=[output_texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities]
+    )
     def render_topk_entities(
+        output_texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
     ):
         for text, entity_spans, normal_entities, category_entities, span_entities in zip(
+            output_texts, batch_entity_spans, topk_normal_entities, topk_category_entities, topk_span_entities
         ):
             highlighted_text_value = []
             cur = 0