import os # โœ… Load secrets from Hugging Face Spaces environment MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY") LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY") # โœ… Verify that keys are loaded (prints only in development mode) if MISTRAL_API_KEY is None or LLAMA_CLOUD_API_KEY is None: print("๐Ÿšจ ERROR: Missing API keys. Please set them in Hugging Face Secrets.") import nest_asyncio nest_asyncio.apply() # imports from llama_index.embeddings.mistralai import MistralAIEmbedding from llama_index.core import Settings Settings.embed_model = MistralAIEmbedding(model_name="mistral-embed") from llama_index.core import VectorStoreIndex from llama_parse import LlamaParse from llama_index.llms.mistralai import MistralAI llm = MistralAI(model="mistral-large-latest", api_key=MISTRAL_API_KEY) from llama_index.core.workflow import ( StartEvent, StopEvent, Workflow, step, Event, Context ) class QueryEvent(Event): query: str from llama_index.core import StorageContext, load_index_from_storage import os import hashlib from pathlib import Path class RAGWorkflow(Workflow): storage_dir = "./storage" hash_file = "./last_resume_hash.txt" llm: MistralAI query_engine: VectorStoreIndex def compute_file_hash(self, file_path): """Compute SHA256 hash of a file from its path.""" hasher = hashlib.sha256() with open(file_path, "rb") as f: # Read file in binary mode while chunk := f.read(8192): hasher.update(chunk) return hasher.hexdigest() def get_last_stored_hash(self): """Retrieve the last stored resume hash, if available.""" if os.path.exists(self.hash_file): with open(self.hash_file, "r") as f: return f.read().strip() return None def update_stored_hash(self, new_hash): """Update the stored resume hash after processing a new file.""" with open(self.hash_file, "w") as f: f.write(new_hash) @step async def set_up(self, ctx: Context, ev: StartEvent) -> QueryEvent: if not ev.resume_file: raise ValueError("โŒ No resume file provided") # โœ… Extract the correct file path if isinstance(ev.resume_file, gr.utils.NamedString): file_path = ev.resume_file.name elif isinstance(ev.resume_file, str) and os.path.exists(ev.resume_file): file_path = ev.resume_file else: raise ValueError("โš ๏ธ Invalid file format received!") print(f"โœ… Resume File Path: {file_path}") self.llm = MistralAI(model="mistral-large-latest") # โœ… Compute hash of the uploaded resume file new_resume_hash = self.compute_file_hash(file_path) last_stored_hash = self.get_last_stored_hash() if os.path.exists(self.storage_dir) and last_stored_hash == new_resume_hash: # Resume hasn't changed; load the existing index storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir) index = load_index_from_storage(storage_context) else: # Resume is new; process and update storage documents = LlamaParse( result_type="markdown", content_guideline_instruction="Extract structured bullet points from the resume." ).load_data(file_path, extra_info={"file_name": os.path.basename(file_path)}) index = VectorStoreIndex.from_documents( documents, embed_model=Settings.embed_model # Use Hugging Face embeddings ) index.storage_context.persist(persist_dir=self.storage_dir) # โœ… Update stored hash self.update_stored_hash(new_resume_hash) self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5) return QueryEvent(query=ev.query) @step async def ask_question(self, ctx: Context, ev: QueryEvent) -> StopEvent: response = self.query_engine.query(f"This is a question about the resume: {ev.query}") return StopEvent(result=response.response) import gradio as gr import asyncio import os # โœ… Ensure you have your RAGWorkflow properly initialized w = RAGWorkflow(timeout=120, verbose=False) async def process_resume(file, query): """Handles Gradio file upload and query processing (Async).""" if file is None: return "โŒ Please upload a resume." if not query: return "โŒ Please enter a question." try: # โœ… Use the actual file path from Gradio file_path = file.name # โœ… Debugging information print(f"โœ… File uploaded: {file_path}") print(f"โœ… File size: {os.path.getsize(file_path)} bytes") # โœ… Run the RAG workflow with the actual file path result = await w.run( resume_file=file_path, # โœ… Pass file path, not BytesIO query=query ) print("โœ… Result:", result) # Debug output return result if result else "โš ๏ธ No relevant information found." except Exception as e: print("๐Ÿšจ Error:", str(e)) return f"๐Ÿšจ Error occurred: {str(e)}" # โœ… Function to clear inputs def clear_inputs(): return None, "", "" # โœ… Create Gradio UI with gr.Blocks() as demo: gr.Markdown("# ๐Ÿ“„ RAGResume") gr.Markdown(""" **Upload a resume and ask questions about it!** """) gr.Markdown(""" 1. Upload a resume in PDF format. 2. Enter a question about the resume (example: where does the applicant currently work?). 3. Click on the "Submit" button to get the response. 4. Click on the "Clear" button to reset the inputs. """) with gr.Row(): file_input = gr.File(label="๐Ÿ“„ Upload Resume (PDF)") query_input = gr.Textbox(label="๐Ÿ’ฌ Enter your question") output = gr.Textbox(label="๐Ÿ“ Response") with gr.Row(): submit_btn = gr.Button("๐Ÿš€ Submit") clear_btn = gr.Button("๐Ÿงน Clear") submit_btn.click(process_resume, inputs=[file_input, query_input], outputs=output) clear_btn.click(clear_inputs, outputs=[file_input, query_input, output]) # โœ… Fix for Colab & Hugging Face Spaces try: import nest_asyncio nest_asyncio.apply() # โœ… Fix for Jupyter/Colab Notebooks except ImportError: pass # โœ… Launch Gradio with proper Async Handling def run_demo(): demo.queue() # Enables async functions demo.launch(share=True) # โœ… Public link enabled if __name__ == "__main__": loop = asyncio.get_event_loop() try: loop.run_until_complete(run_demo()) # โœ… Runs correctly in scripts except RuntimeError: asyncio.run(run_demo()) # โœ… Fallback for runtime errors