Spaces:
Running
Running
up
Browse files- .gitignore +2 -0
- README.md +1 -11
- app.py +31 -0
- embed_matrix.npy +3 -0
- embed_matrix_hybrid.npy +3 -0
- embed_matrix_hybrid_graphsage.npy +3 -0
- grafo_embed.pickle +3 -0
- grafo_embed_hybrid.pickle +3 -0
- grafo_embed_hybrid_graphsage.pickle +3 -0
- grafo_ttl_hibrido.ttl +0 -0
- grafo_ttl_hibrido_graphsage.ttl +0 -0
- grafo_ttl_no_hibrido.ttl +0 -0
- id_map.pkl +3 -0
- id_map_hybrid.pkl +3 -0
- id_map_hybrid_graphsage.pkl +3 -0
- rag_hf.py +253 -0
- requirements.txt +97 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.streamlit/secrets.toml
|
README.md
CHANGED
@@ -1,11 +1 @@
|
|
1 |
-
|
2 |
-
title: RAG SA
|
3 |
-
emoji: 👀
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: static
|
7 |
-
pinned: false
|
8 |
-
license: mit
|
9 |
-
---
|
10 |
-
|
11 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# RAG-glottolog
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Created on Wed Apr 9 10:56:02 2025
|
4 |
+
|
5 |
+
@author: jveraz
|
6 |
+
"""
|
7 |
+
|
8 |
+
from fastapi import FastAPI
|
9 |
+
from pydantic import BaseModel
|
10 |
+
import torch
|
11 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
|
16 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
17 |
+
model = AutoModelForCausalLM.from_pretrained(
|
18 |
+
MODEL_ID,
|
19 |
+
torch_dtype=torch.float16,
|
20 |
+
device_map="auto"
|
21 |
+
)
|
22 |
+
|
23 |
+
class QueryInput(BaseModel):
|
24 |
+
inputs: str
|
25 |
+
|
26 |
+
@app.post("/")
|
27 |
+
async def generate(query: QueryInput):
|
28 |
+
input_ids = tokenizer(query.inputs, return_tensors="pt").input_ids.to(model.device)
|
29 |
+
output_ids = model.generate(input_ids, max_new_tokens=200)
|
30 |
+
generated = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
31 |
+
return {"generated_text": generated}
|
embed_matrix.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447c4325d930de44070f9def3ad15bf54a93d39956587fc20525cfc5060f4a50
|
3 |
+
size 1695872
|
embed_matrix_hybrid.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1fbd97a4a97523185aab51c4fad790aff36c8acd6710c5607f4acea9be0b96b
|
3 |
+
size 3382400
|
embed_matrix_hybrid_graphsage.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4711ff26a44c4938f94a79169dd56b7adda7668e1b6dd89f718059092ca5c50
|
3 |
+
size 3382400
|
grafo_embed.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:420fe64e2680f0d3c4f7f90b891f21b56cce89784ea104b3a2878e8b845ce451
|
3 |
+
size 4470194
|
grafo_embed_hybrid.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:776ba033f208a5266c6a5b076189d96a9651993cdcd2297f5b55ed33a98dcf6d
|
3 |
+
size 8320820
|
grafo_embed_hybrid_graphsage.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3993c376704872f90798a447434da0487db268d904ca4517e26c519b07bf1ac1
|
3 |
+
size 7958567
|
grafo_ttl_hibrido.ttl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
grafo_ttl_hibrido_graphsage.ttl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
grafo_ttl_no_hibrido.ttl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
id_map.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4e5e2ca4af562078ca2cc94a56fc409ac1fd0312f514a2835117568bc89b034
|
3 |
+
size 6088
|
id_map_hybrid.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f011f1929fabc31724e935b9eac34ae691b530367d1b831bb2d92e78b555280a
|
3 |
+
size 12189
|
id_map_hybrid_graphsage.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebb6de3e35fea86da236cd9b12a1b6d6a1d4867941c25955b401c3bf857ed5f6
|
3 |
+
size 12189
|
rag_hf.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# rag_interface.py (with numpy instead of faiss)
|
2 |
+
import streamlit as st
|
3 |
+
import pickle
|
4 |
+
import numpy as np
|
5 |
+
import rdflib
|
6 |
+
import torch
|
7 |
+
import datetime
|
8 |
+
import os
|
9 |
+
import requests
|
10 |
+
from rdflib import Graph as RDFGraph, Namespace
|
11 |
+
from sentence_transformers import SentenceTransformer
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
|
14 |
+
# === CONFIGURATION ===
|
15 |
+
load_dotenv()
|
16 |
+
|
17 |
+
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"
|
18 |
+
EMBEDDING_MODEL = "intfloat/multilingual-e5-base"
|
19 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
+
EX = Namespace("http://example.org/lang/")
|
21 |
+
|
22 |
+
st.set_page_config(
|
23 |
+
page_title="Vanishing Voices: Language Atlas",
|
24 |
+
page_icon="🌍",
|
25 |
+
layout="wide",
|
26 |
+
initial_sidebar_state="expanded"
|
27 |
+
)
|
28 |
+
|
29 |
+
# Custom CSS
|
30 |
+
st.markdown("""
|
31 |
+
<style>
|
32 |
+
.header {
|
33 |
+
color: #2c3e50;
|
34 |
+
border-bottom: 2px solid #3498db;
|
35 |
+
padding-bottom: 10px;
|
36 |
+
margin-bottom: 1.5rem;
|
37 |
+
}
|
38 |
+
.info-box {
|
39 |
+
background-color: #e8f4fc;
|
40 |
+
border-radius: 8px;
|
41 |
+
padding: 1rem;
|
42 |
+
margin-bottom: 1.5rem;
|
43 |
+
border-left: 4px solid #3498db;
|
44 |
+
}
|
45 |
+
.sidebar-section {
|
46 |
+
margin-bottom: 2rem;
|
47 |
+
}
|
48 |
+
.sidebar-title {
|
49 |
+
color: #2c3e50;
|
50 |
+
font-size: 1.1rem;
|
51 |
+
font-weight: 600;
|
52 |
+
margin-bottom: 0.5rem;
|
53 |
+
border-bottom: 1px solid #eee;
|
54 |
+
padding-bottom: 0.5rem;
|
55 |
+
}
|
56 |
+
.method-card {
|
57 |
+
background-color: #f8f9fa;
|
58 |
+
border-radius: 8px;
|
59 |
+
padding: 0.8rem;
|
60 |
+
margin-bottom: 0.8rem;
|
61 |
+
border-left: 3px solid #3498db;
|
62 |
+
}
|
63 |
+
.method-title {
|
64 |
+
font-weight: 600;
|
65 |
+
color: #3498db;
|
66 |
+
margin-bottom: 0.3rem;
|
67 |
+
}
|
68 |
+
</style>
|
69 |
+
""", unsafe_allow_html=True)
|
70 |
+
|
71 |
+
@st.cache_resource(show_spinner="Loading models and indexes...")
|
72 |
+
def load_all_components():
|
73 |
+
embedder = SentenceTransformer(EMBEDDING_MODEL, device=DEVICE)
|
74 |
+
methods = {}
|
75 |
+
for label, suffix, ttl, matrix_path in [
|
76 |
+
("Standard", "", "grafo_ttl_no_hibrido.ttl", "embed_matrix.npy"),
|
77 |
+
("Hybrid", "_hybrid", "grafo_ttl_hibrido.ttl", "embed_matrix_hybrid.npy"),
|
78 |
+
("GraphSAGE", "_hybrid_graphsage", "grafo_ttl_hibrido_graphsage.ttl", "embed_matrix_hybrid_graphsage.npy")
|
79 |
+
]:
|
80 |
+
with open(f"id_map{suffix}.pkl", "rb") as f:
|
81 |
+
id_map = pickle.load(f)
|
82 |
+
with open(f"grafo_embed{suffix}.pickle", "rb") as f:
|
83 |
+
G = pickle.load(f)
|
84 |
+
matrix = np.load(matrix_path)
|
85 |
+
rdf = RDFGraph()
|
86 |
+
rdf.parse(ttl, format="ttl")
|
87 |
+
methods[label] = (matrix, id_map, G, rdf)
|
88 |
+
return methods, embedder
|
89 |
+
|
90 |
+
methods, embedder = load_all_components()
|
91 |
+
|
92 |
+
# === CORE FUNCTIONS ===
|
93 |
+
def get_top_k(matrix, id_map, query, k):
|
94 |
+
vec = embedder.encode(f"query: {query}", convert_to_tensor=True, device=DEVICE)
|
95 |
+
vec = vec.cpu().numpy().astype("float32")
|
96 |
+
sims = np.dot(matrix, vec) / (np.linalg.norm(matrix, axis=1) * np.linalg.norm(vec) + 1e-10)
|
97 |
+
top_k_idx = np.argsort(sims)[-k:][::-1]
|
98 |
+
return [id_map[i] for i in top_k_idx]
|
99 |
+
|
100 |
+
def get_context(G, lang_id):
|
101 |
+
node = G.nodes.get(lang_id, {})
|
102 |
+
lines = [f"**Language:** {node.get('label', lang_id)}"]
|
103 |
+
if node.get("wikipedia_summary"):
|
104 |
+
lines.append(f"**Wikipedia:** {node['wikipedia_summary']}")
|
105 |
+
if node.get("wikidata_description"):
|
106 |
+
lines.append(f"**Wikidata:** {node['wikidata_description']}")
|
107 |
+
if node.get("wikidata_countries"):
|
108 |
+
lines.append(f"**Countries:** {node['wikidata_countries']}")
|
109 |
+
return "\n\n".join(lines)
|
110 |
+
|
111 |
+
def query_rdf(rdf, lang_id):
|
112 |
+
q = f"""
|
113 |
+
PREFIX ex: <http://example.org/lang/>
|
114 |
+
SELECT ?property ?value WHERE {{ ex:{lang_id} ?property ?value }}
|
115 |
+
"""
|
116 |
+
try:
|
117 |
+
return [
|
118 |
+
(str(row[0]).split("/")[-1], str(row[1]))
|
119 |
+
for row in rdf.query(q)
|
120 |
+
]
|
121 |
+
except Exception as e:
|
122 |
+
return [("error", str(e))]
|
123 |
+
|
124 |
+
def generate_response(matrix, id_map, G, rdf, user_question, k=3):
|
125 |
+
ids = get_top_k(matrix, id_map, user_question, k)
|
126 |
+
context = [get_context(G, i) for i in ids]
|
127 |
+
rdf_facts = []
|
128 |
+
for i in ids:
|
129 |
+
rdf_facts.extend([f"{p}: {v}" for p, v in query_rdf(rdf, i)])
|
130 |
+
prompt = f"""<s>[INST]
|
131 |
+
You are an expert in South American indigenous languages.
|
132 |
+
Use strictly and only the information below to answer the user question in **English**.
|
133 |
+
- Do not infer or assume facts that are not explicitly stated.
|
134 |
+
- If the answer is unknown or insufficient, say "I cannot answer with the available data."
|
135 |
+
- Limit your answer to 100 words.
|
136 |
+
|
137 |
+
|
138 |
+
### CONTEXT:
|
139 |
+
{chr(10).join(context)}
|
140 |
+
|
141 |
+
### RDF RELATIONS:
|
142 |
+
{chr(10).join(rdf_facts)}
|
143 |
+
|
144 |
+
### QUESTION:
|
145 |
+
{user_question}
|
146 |
+
|
147 |
+
Answer:
|
148 |
+
[/INST]"""
|
149 |
+
try:
|
150 |
+
res = requests.post(
|
151 |
+
f"https://api-inference.huggingface.co/models/{MODEL_ID}",
|
152 |
+
headers={"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}", "Content-Type": "application/json"},
|
153 |
+
json={"inputs": prompt}, timeout=30
|
154 |
+
)
|
155 |
+
out = res.json()
|
156 |
+
if isinstance(out, list) and "generated_text" in out[0]:
|
157 |
+
return out[0]["generated_text"].replace(prompt.strip(), "").strip(), ids, context, rdf_facts
|
158 |
+
return str(out), ids, context, rdf_facts
|
159 |
+
except Exception as e:
|
160 |
+
return str(e), ids, context, rdf_facts
|
161 |
+
|
162 |
+
# === MAIN FUNCTION ===
|
163 |
+
def main():
|
164 |
+
st.markdown("""
|
165 |
+
<h1 class='header'>Vanishing Voices: South America's Endangered Language Atlas</h1>
|
166 |
+
<div class='info-box'>
|
167 |
+
<b>Linguistic Emergency:</b> Over 40% of South America's indigenous languages face extinction.
|
168 |
+
This tool documents these cultural treasures before they disappear forever.
|
169 |
+
</div>
|
170 |
+
""", unsafe_allow_html=True)
|
171 |
+
|
172 |
+
with st.sidebar:
|
173 |
+
st.image("https://glottolog.org/static/img/glottolog_lod.png", width=180)
|
174 |
+
|
175 |
+
with st.container():
|
176 |
+
st.markdown('<div class="sidebar-title">About This Tool</div>', unsafe_allow_html=True)
|
177 |
+
st.markdown("""
|
178 |
+
<div class="method-card">
|
179 |
+
<div class="method-title">Standard Search</div>
|
180 |
+
Semantic retrieval based on text-only embeddings. Identifies languages using purely linguistic similarity from Wikipedia summaries and labels.
|
181 |
+
</div>
|
182 |
+
<div class="method-card">
|
183 |
+
<div class="method-title">Hybrid Search</div>
|
184 |
+
Combines semantic embeddings with structured data from knowledge graphs. Enriches language representation with contextual facts.
|
185 |
+
</div>
|
186 |
+
<div class="method-card">
|
187 |
+
<div class="method-title">GraphSAGE Search</div>
|
188 |
+
Leverages deep graph neural networks to learn relational patterns across languages. Captures complex cultural and genealogical connections.
|
189 |
+
</div>
|
190 |
+
""", unsafe_allow_html=True)
|
191 |
+
|
192 |
+
with st.container():
|
193 |
+
st.markdown('<div class="sidebar-title">Research Settings</div>', unsafe_allow_html=True)
|
194 |
+
k = st.slider("Languages to analyze per query", 1, 10, 3)
|
195 |
+
st.markdown("**Display Options:**")
|
196 |
+
show_ids = st.checkbox("Language IDs", value=True, key="show_ids")
|
197 |
+
show_ctx = st.checkbox("Cultural Context", value=True, key="show_ctx")
|
198 |
+
show_rdf = st.checkbox("RDF Relations", value=True, key="show_rdf")
|
199 |
+
|
200 |
+
with st.container():
|
201 |
+
st.markdown('<div class="sidebar-title">Data Sources</div>', unsafe_allow_html=True)
|
202 |
+
st.markdown("""
|
203 |
+
- Glottolog
|
204 |
+
- Wikidata
|
205 |
+
- Wikipedia
|
206 |
+
- Ethnologue
|
207 |
+
""")
|
208 |
+
|
209 |
+
query = st.text_input("Ask about indigenous languages:", "Which Amazonian languages are most at risk?")
|
210 |
+
|
211 |
+
if st.button("Analyze with All Methods") and query:
|
212 |
+
col1, col2, col3 = st.columns(3)
|
213 |
+
results = {}
|
214 |
+
for col, (label, method) in zip([col1, col2, col3], methods.items()):
|
215 |
+
with col:
|
216 |
+
st.subheader(f"{label} Analysis")
|
217 |
+
start = datetime.datetime.now()
|
218 |
+
response, lang_ids, context, rdf_data = generate_response(*method, query, k)
|
219 |
+
duration = (datetime.datetime.now() - start).total_seconds()
|
220 |
+
st.markdown(response)
|
221 |
+
st.markdown(f"⏱️ {duration:.2f}s | 🌐 {len(lang_ids)} languages")
|
222 |
+
if show_ids:
|
223 |
+
st.markdown("**Language Identifiers:**")
|
224 |
+
st.code("\n".join(lang_ids))
|
225 |
+
if show_ctx:
|
226 |
+
st.markdown("**Cultural Context:**")
|
227 |
+
st.markdown("\n\n---\n\n".join(context))
|
228 |
+
if show_rdf:
|
229 |
+
st.markdown("**RDF Knowledge:**")
|
230 |
+
st.code("\n".join(rdf_data))
|
231 |
+
results[label] = response
|
232 |
+
|
233 |
+
log = f"""
|
234 |
+
[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}]
|
235 |
+
QUERY: {query}
|
236 |
+
STANDARD:
|
237 |
+
{results.get('Standard', '')}
|
238 |
+
|
239 |
+
HYBRID:
|
240 |
+
{results.get('Hybrid', '')}
|
241 |
+
|
242 |
+
GRAPH-SAGE:
|
243 |
+
{results.get('GraphSAGE', '')}
|
244 |
+
{'='*60}
|
245 |
+
"""
|
246 |
+
try:
|
247 |
+
with open("language_analysis_logs.txt", "a", encoding="utf-8") as f:
|
248 |
+
f.write(log)
|
249 |
+
except Exception as e:
|
250 |
+
st.warning(f"Failed to log: {str(e)}")
|
251 |
+
|
252 |
+
if __name__ == "__main__":
|
253 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==2.2.2
|
2 |
+
aiohappyeyeballs==2.6.1
|
3 |
+
aiohttp==3.11.16
|
4 |
+
aiosignal==1.3.2
|
5 |
+
altair==5.5.0
|
6 |
+
annotated-types==0.7.0
|
7 |
+
anyio==4.9.0
|
8 |
+
attrs==25.3.0
|
9 |
+
blinker==1.9.0
|
10 |
+
cachetools==5.5.2
|
11 |
+
certifi==2025.1.31
|
12 |
+
charset-normalizer==3.4.1
|
13 |
+
click==8.1.8
|
14 |
+
colorama==0.4.6
|
15 |
+
contourpy==1.3.1
|
16 |
+
cycler==0.12.1
|
17 |
+
et_xmlfile==2.0.0
|
18 |
+
faiss-cpu==1.10.0
|
19 |
+
fastapi==0.115.12
|
20 |
+
filelock==3.18.0
|
21 |
+
fonttools==4.56.0
|
22 |
+
frozenlist==1.5.0
|
23 |
+
fsspec==2025.3.2
|
24 |
+
gensim==4.3.3
|
25 |
+
gitdb==4.0.12
|
26 |
+
GitPython==3.1.44
|
27 |
+
huggingface-hub==0.30.1
|
28 |
+
idna==3.10
|
29 |
+
Jinja2==3.1.6
|
30 |
+
joblib==1.4.2
|
31 |
+
jsonschema==4.23.0
|
32 |
+
jsonschema-specifications==2024.10.1
|
33 |
+
kiwisolver==1.4.8
|
34 |
+
Levenshtein==0.27.1
|
35 |
+
MarkupSafe==3.0.2
|
36 |
+
matplotlib==3.10.1
|
37 |
+
mpmath==1.3.0
|
38 |
+
multidict==6.3.2
|
39 |
+
narwhals==1.33.0
|
40 |
+
networkx==3.4.2
|
41 |
+
nltk==3.9.1
|
42 |
+
node2vec==0.5.0
|
43 |
+
numpy==1.26.4
|
44 |
+
openpyxl==3.1.5
|
45 |
+
ordpy==1.1.5
|
46 |
+
packaging==24.2
|
47 |
+
pandas==2.2.3
|
48 |
+
pillow==11.1.0
|
49 |
+
propcache==0.3.1
|
50 |
+
protobuf==5.29.4
|
51 |
+
psutil==7.0.0
|
52 |
+
pyarrow==19.0.1
|
53 |
+
pydantic==2.11.3
|
54 |
+
pydantic_core==2.33.1
|
55 |
+
pydeck==0.9.1
|
56 |
+
pyparsing==3.2.3
|
57 |
+
python-dateutil==2.9.0.post0
|
58 |
+
python-dotenv==1.1.0
|
59 |
+
python-math==0.0.1
|
60 |
+
pytz==2025.2
|
61 |
+
PyYAML==6.0.2
|
62 |
+
RapidFuzz==3.12.2
|
63 |
+
rdflib==7.1.4
|
64 |
+
referencing==0.36.2
|
65 |
+
regex==2024.11.6
|
66 |
+
requests==2.32.3
|
67 |
+
rouge_score==0.1.2
|
68 |
+
rpds-py==0.24.0
|
69 |
+
safetensors==0.5.3
|
70 |
+
scikit-learn==1.6.1
|
71 |
+
scipy==1.13.1
|
72 |
+
sentence-transformers==4.0.1
|
73 |
+
setuptools==75.8.0
|
74 |
+
six==1.17.0
|
75 |
+
smart-open==7.1.0
|
76 |
+
smmap==5.0.2
|
77 |
+
sniffio==1.3.1
|
78 |
+
starlette==0.46.1
|
79 |
+
streamlit==1.44.1
|
80 |
+
sympy==1.13.1
|
81 |
+
tenacity==9.1.2
|
82 |
+
threadpoolctl==3.6.0
|
83 |
+
tokenizers==0.21.1
|
84 |
+
toml==0.10.2
|
85 |
+
torch==2.6.0
|
86 |
+
torch-geometric==2.6.1
|
87 |
+
tornado==6.4.2
|
88 |
+
tqdm==4.67.1
|
89 |
+
transformers==4.50.3
|
90 |
+
typing-inspection==0.4.0
|
91 |
+
typing_extensions==4.13.0
|
92 |
+
tzdata==2025.2
|
93 |
+
urllib3==2.3.0
|
94 |
+
watchdog==6.0.0
|
95 |
+
wheel==0.45.1
|
96 |
+
wrapt==1.17.2
|
97 |
+
yarl==1.19.0
|