Spaces:
Sleeping
Sleeping
File size: 4,051 Bytes
54f3307 c2e437e 54f3307 6e0eacd 33b440a a45aab6 33b440a c2e437e f05a7d1 5723e0b a45aab6 d387203 33b440a 6e0eacd a45aab6 6e0eacd a45aab6 6e0eacd a45aab6 6e0eacd a45aab6 6e0eacd 33b440a 6e0eacd d387203 f05a7d1 d387203 f05a7d1 d387203 6e0eacd d387203 6e0eacd d387203 f05a7d1 33b440a 1bf650f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import google.generativeai as genai
import streamlit as st
import fitz
import spacy
from docx import Document
import dateparser
from datetime import datetime
from giner import GiNER
# Load SpaCy model
nlp = spacy.load('en_core_web_sm')
# Load GLiNER model
giner = GiNER("roberta-large")
def extract_text_from_pdf(file):
pdf = fitz.open(stream=file.read(), filetype="pdf")
text = ""
for page in pdf:
text += page.get_text()
return text
def extract_text_from_doc(file):
doc = Document(file)
return " ".join([paragraph.text for paragraph in doc.paragraphs])
def authenticate_gemini(api_key):
try:
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-pro')
return model
except Exception as e:
st.error(f"Authentication failed: {e}")
return None
def generate_summary(text, model):
prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills."
response = model.generate_content(prompt)
return response.text
def extract_info(text):
doc = nlp(text)
giner_results = giner.annotate(text)
# Extract companies
companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"])
companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"])
# Extract experience
experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0])
# Extract education
education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])])
education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])])
# Extract contact information
email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found")
phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found")
# Use GLiNER for additional entity extraction
for entity in giner_results:
if entity['type'] == "PER" and email == "Not found":
email = entity['text']
elif entity['type'] == "PHONE" and phone == "Not found":
phone = entity['text']
return list(companies), experience, list(education), email, phone
def main():
st.title("Enhanced Resume Analyzer")
api_key = st.secrets["GEMINI_API_KEY"] # Use Streamlit secrets
uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"])
if uploaded_file is not None:
try:
model = authenticate_gemini(api_key)
if model is None:
return
if uploaded_file.type == "application/pdf":
resume_text = extract_text_from_pdf(uploaded_file)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
resume_text = extract_text_from_doc(uploaded_file)
else:
st.error("Unsupported file format.")
return
companies, experience, education, email, phone = extract_info(resume_text)
st.subheader("Extracted Information")
st.write(f"*Years of Experience:* {experience}")
st.write("*Companies:*", ", ".join(companies))
st.write("*Education:*", ", ".join(education))
st.write(f"*Email:* {email}")
st.write(f"*Phone:* {phone}")
summary = generate_summary(resume_text, model)
st.subheader("Resume Summary")
st.write(summary)
except Exception as e:
st.error(f"Error during processing: {str(e)}")
st.exception(e) # This will print the full traceback
if __name__ == "__main__":
main() |