Update app.py
Browse files
app.py
CHANGED
@@ -3,12 +3,12 @@ from datasets import load_dataset
|
|
3 |
from rapidfuzz import process, fuzz
|
4 |
|
5 |
# ──────────────────────────────────────────────────────────
|
6 |
-
# 1)
|
7 |
# ──────────────────────────────────────────────────────────
|
8 |
ds = load_dataset(
|
9 |
"nyuuzyou/clker-svg",
|
10 |
split="train",
|
11 |
-
streaming=True,
|
12 |
)
|
13 |
records = []
|
14 |
for ex in ds:
|
@@ -23,16 +23,16 @@ for ex in ds:
|
|
23 |
)
|
24 |
|
25 |
# ──────────────────────────────────────────────────────────
|
26 |
-
# 2)
|
27 |
# ──────────────────────────────────────────────────────────
|
28 |
def search_svg(query: str, top_k: int):
|
29 |
if not query.strip():
|
30 |
-
return "⚠️
|
31 |
|
32 |
-
# choices: index(int) ➜ title+tags
|
33 |
choices = {i: f"{r['title']} {r['tags']}" for i, r in enumerate(records)}
|
34 |
|
35 |
-
# Rapidfuzz: (choice_text, score, key)
|
36 |
matched = process.extract(
|
37 |
query,
|
38 |
choices,
|
@@ -44,23 +44,23 @@ def search_svg(query: str, top_k: int):
|
|
44 |
html_start = '<div class="gallery-grid">'
|
45 |
html_end = '</div>'
|
46 |
|
47 |
-
for _, score, idx in matched:
|
48 |
r = records[idx]
|
49 |
svg_html = (
|
50 |
'<div class="gallery-item">'
|
51 |
f'<div class="svg-container">{r["svg"]}</div>'
|
52 |
f'<div class="item-details">'
|
53 |
f'<h3>{r["title"]}</h3>'
|
54 |
-
f'<div class="score"
|
55 |
f'<div class="tags">{r["tags"]}</div>'
|
56 |
-
f'<a href="{r["url"]}" target="_blank" class="download-link"
|
57 |
f'</div>'
|
58 |
'</div>'
|
59 |
)
|
60 |
html_snippets.append(svg_html)
|
61 |
|
62 |
if not html_snippets:
|
63 |
-
return "
|
64 |
|
65 |
return "", html_start + ''.join(html_snippets) + html_end
|
66 |
|
@@ -69,8 +69,8 @@ def search_svg(query: str, top_k: int):
|
|
69 |
# ──────────────────────────────────────────────────────────
|
70 |
TITLE = "🔍 Clker SVG"
|
71 |
DESCRIPTION = """
|
72 |
-
|
73 |
-
|
74 |
"""
|
75 |
DISCORD_BADGE = """<p style="text-align:center; margin-top: -10px;"><a href="https://discord.gg/openfreeai" target="_blank"> <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge"></a></p>"""
|
76 |
|
@@ -151,8 +151,8 @@ with gr.Blocks(title=TITLE, css=CSS) as demo:
|
|
151 |
with gr.Row():
|
152 |
with gr.Column(scale=4):
|
153 |
query_box = gr.Textbox(
|
154 |
-
label="
|
155 |
-
placeholder="
|
156 |
show_label=True
|
157 |
)
|
158 |
with gr.Column(scale=1):
|
@@ -161,11 +161,11 @@ with gr.Blocks(title=TITLE, css=CSS) as demo:
|
|
161 |
maximum=50,
|
162 |
value=12,
|
163 |
step=3,
|
164 |
-
label="
|
165 |
)
|
166 |
|
167 |
with gr.Row():
|
168 |
-
search_button = gr.Button("
|
169 |
|
170 |
warning_md = gr.Markdown()
|
171 |
output_html = gr.HTML()
|
@@ -183,4 +183,4 @@ with gr.Blocks(title=TITLE, css=CSS) as demo:
|
|
183 |
)
|
184 |
|
185 |
if __name__ == "__main__":
|
186 |
-
demo.launch()
|
|
|
3 |
from rapidfuzz import process, fuzz
|
4 |
|
5 |
# ──────────────────────────────────────────────────────────
|
6 |
+
# 1) Load dataset (streaming) ─ only metadata kept in memory
|
7 |
# ──────────────────────────────────────────────────────────
|
8 |
ds = load_dataset(
|
9 |
"nyuuzyou/clker-svg",
|
10 |
split="train",
|
11 |
+
streaming=True, # .jsonl.zst → streamed automatically
|
12 |
)
|
13 |
records = []
|
14 |
for ex in ds:
|
|
|
23 |
)
|
24 |
|
25 |
# ──────────────────────────────────────────────────────────
|
26 |
+
# 2) Search function
|
27 |
# ──────────────────────────────────────────────────────────
|
28 |
def search_svg(query: str, top_k: int):
|
29 |
if not query.strip():
|
30 |
+
return "⚠️ Please enter a search term.", None
|
31 |
|
32 |
+
# choices: index(int) ➜ single-line title+tags string
|
33 |
choices = {i: f"{r['title']} {r['tags']}" for i, r in enumerate(records)}
|
34 |
|
35 |
+
# Rapidfuzz: returns (choice_text, score, key)
|
36 |
matched = process.extract(
|
37 |
query,
|
38 |
choices,
|
|
|
44 |
html_start = '<div class="gallery-grid">'
|
45 |
html_end = '</div>'
|
46 |
|
47 |
+
for _, score, idx in matched: # idx is actual list index
|
48 |
r = records[idx]
|
49 |
svg_html = (
|
50 |
'<div class="gallery-item">'
|
51 |
f'<div class="svg-container">{r["svg"]}</div>'
|
52 |
f'<div class="item-details">'
|
53 |
f'<h3>{r["title"]}</h3>'
|
54 |
+
f'<div class="score">Match score: {score}</div>'
|
55 |
f'<div class="tags">{r["tags"]}</div>'
|
56 |
+
f'<a href="{r["url"]}" target="_blank" class="download-link">Download original</a>'
|
57 |
f'</div>'
|
58 |
'</div>'
|
59 |
)
|
60 |
html_snippets.append(svg_html)
|
61 |
|
62 |
if not html_snippets:
|
63 |
+
return "No results found.", None
|
64 |
|
65 |
return "", html_start + ''.join(html_snippets) + html_end
|
66 |
|
|
|
69 |
# ──────────────────────────────────────────────────────────
|
70 |
TITLE = "🔍 Clker SVG"
|
71 |
DESCRIPTION = """
|
72 |
+
This application lets you quickly search public-domain SVG clip art using the “nyuuzyou/clker-svg” dataset.
|
73 |
+
It finds similar items in titles and tags through fuzzy matching and shows them in a visual gallery.
|
74 |
"""
|
75 |
DISCORD_BADGE = """<p style="text-align:center; margin-top: -10px;"><a href="https://discord.gg/openfreeai" target="_blank"> <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="badge"></a></p>"""
|
76 |
|
|
|
151 |
with gr.Row():
|
152 |
with gr.Column(scale=4):
|
153 |
query_box = gr.Textbox(
|
154 |
+
label="Search term",
|
155 |
+
placeholder="e.g. cat, tree, house, computer, flower...",
|
156 |
show_label=True
|
157 |
)
|
158 |
with gr.Column(scale=1):
|
|
|
161 |
maximum=50,
|
162 |
value=12,
|
163 |
step=3,
|
164 |
+
label="Number of results"
|
165 |
)
|
166 |
|
167 |
with gr.Row():
|
168 |
+
search_button = gr.Button("Search", variant="primary")
|
169 |
|
170 |
warning_md = gr.Markdown()
|
171 |
output_html = gr.HTML()
|
|
|
183 |
)
|
184 |
|
185 |
if __name__ == "__main__":
|
186 |
+
demo.launch()
|