Spaces:

nharshavardhana
/

RAGResume

Running

App Files Files Community

RAGResume / app.py

nharshavardhana

commit

5c7c2b0 2 months ago

raw

history blame contribute delete

6.82 kB

	import os


	# ✅ Load secrets from Hugging Face Spaces environment
	MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
	LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")

	# ✅ Verify that keys are loaded (prints only in development mode)
	if MISTRAL_API_KEY is None or LLAMA_CLOUD_API_KEY is None:
	print("🚨 ERROR: Missing API keys. Please set them in Hugging Face Secrets.")


	import nest_asyncio
	nest_asyncio.apply()

	# imports
	from llama_index.embeddings.mistralai import MistralAIEmbedding
	from llama_index.core import Settings
	Settings.embed_model = MistralAIEmbedding(model_name="mistral-embed")
	from llama_index.core import VectorStoreIndex
	from llama_parse import LlamaParse

	from llama_index.llms.mistralai import MistralAI
	llm = MistralAI(model="mistral-large-latest", api_key=MISTRAL_API_KEY)

	from llama_index.core.workflow import (
	StartEvent,
	StopEvent,
	Workflow,
	step,
	Event,
	Context
	)

	class QueryEvent(Event):
	query: str

	from llama_index.core import StorageContext, load_index_from_storage

	import os
	import hashlib
	from pathlib import Path


	class RAGWorkflow(Workflow):
	storage_dir = "./storage"
	hash_file = "./last_resume_hash.txt"
	llm: MistralAI
	query_engine: VectorStoreIndex

	def compute_file_hash(self, file_path):
	"""Compute SHA256 hash of a file from its path."""
	hasher = hashlib.sha256()
	with open(file_path, "rb") as f: # Read file in binary mode
	while chunk := f.read(8192):
	hasher.update(chunk)
	return hasher.hexdigest()

	def get_last_stored_hash(self):
	"""Retrieve the last stored resume hash, if available."""
	if os.path.exists(self.hash_file):
	with open(self.hash_file, "r") as f:
	return f.read().strip()
	return None

	def update_stored_hash(self, new_hash):
	"""Update the stored resume hash after processing a new file."""
	with open(self.hash_file, "w") as f:
	f.write(new_hash)

	@step
	async def set_up(self, ctx: Context, ev: StartEvent) -> QueryEvent:
	if not ev.resume_file:
	raise ValueError("❌ No resume file provided")

	# ✅ Extract the correct file path
	if isinstance(ev.resume_file, gr.utils.NamedString):
	file_path = ev.resume_file.name
	elif isinstance(ev.resume_file, str) and os.path.exists(ev.resume_file):
	file_path = ev.resume_file
	else:
	raise ValueError("⚠️ Invalid file format received!")

	print(f"✅ Resume File Path: {file_path}")

	self.llm = MistralAI(model="mistral-large-latest")

	# ✅ Compute hash of the uploaded resume file
	new_resume_hash = self.compute_file_hash(file_path)
	last_stored_hash = self.get_last_stored_hash()

	if os.path.exists(self.storage_dir) and last_stored_hash == new_resume_hash:
	# Resume hasn't changed; load the existing index
	storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
	index = load_index_from_storage(storage_context)
	else:
	# Resume is new; process and update storage
	documents = LlamaParse(
	result_type="markdown",
	content_guideline_instruction="Extract structured bullet points from the resume."
	).load_data(file_path, extra_info={"file_name": os.path.basename(file_path)})

	index = VectorStoreIndex.from_documents(
	documents,
	embed_model=Settings.embed_model # Use Hugging Face embeddings
	)
	index.storage_context.persist(persist_dir=self.storage_dir)

	# ✅ Update stored hash
	self.update_stored_hash(new_resume_hash)

	self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
	return QueryEvent(query=ev.query)

	@step
	async def ask_question(self, ctx: Context, ev: QueryEvent) -> StopEvent:
	response = self.query_engine.query(f"This is a question about the resume: {ev.query}")
	return StopEvent(result=response.response)

	import gradio as gr
	import asyncio
	import os

	# ✅ Ensure you have your RAGWorkflow properly initialized
	w = RAGWorkflow(timeout=120, verbose=False)

	async def process_resume(file, query):
	"""Handles Gradio file upload and query processing (Async)."""
	if file is None:
	return "❌ Please upload a resume."
	if not query:
	return "❌ Please enter a question."

	try:
	# ✅ Use the actual file path from Gradio
	file_path = file.name

	# ✅ Debugging information
	print(f"✅ File uploaded: {file_path}")
	print(f"✅ File size: {os.path.getsize(file_path)} bytes")

	# ✅ Run the RAG workflow with the actual file path
	result = await w.run(
	resume_file=file_path, # ✅ Pass file path, not BytesIO
	query=query
	)

	print("✅ Result:", result) # Debug output
	return result if result else "⚠️ No relevant information found."

	except Exception as e:
	print("🚨 Error:", str(e))
	return f"🚨 Error occurred: {str(e)}"

	# ✅ Function to clear inputs
	def clear_inputs():
	return None, "", ""

	# ✅ Create Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 RAGResume")
	gr.Markdown("""
	Upload a resume and ask questions about it!
	""")
	gr.Markdown("""
	1. Upload a resume in PDF format.
	2. Enter a question about the resume (example: where does the applicant currently work?).
	3. Click on the "Submit" button to get the response.
	4. Click on the "Clear" button to reset the inputs.
	""")

	with gr.Row():
	file_input = gr.File(label="📄 Upload Resume (PDF)")
	query_input = gr.Textbox(label="💬 Enter your question")
	output = gr.Textbox(label="📝 Response")

	with gr.Row():
	submit_btn = gr.Button("🚀 Submit")
	clear_btn = gr.Button("🧹 Clear")

	submit_btn.click(process_resume, inputs=[file_input, query_input], outputs=output)
	clear_btn.click(clear_inputs, outputs=[file_input, query_input, output])

	# ✅ Fix for Colab & Hugging Face Spaces
	try:
	import nest_asyncio
	nest_asyncio.apply() # ✅ Fix for Jupyter/Colab Notebooks
	except ImportError:
	pass

	# ✅ Launch Gradio with proper Async Handling
	def run_demo():
	demo.queue() # Enables async functions
	demo.launch(share=True) # ✅ Public link enabled

	if __name__ == "__main__":
	loop = asyncio.get_event_loop()

	try:
	loop.run_until_complete(run_demo()) # ✅ Runs correctly in scripts
	except RuntimeError:
	asyncio.run(run_demo()) # ✅ Fallback for runtime errors