Spaces:

bangaboy
/

glinerdemo

Sleeping

File size: 4,051 Bytes

54f3307
c2e437e
54f3307
6e0eacd
33b440a
 
 
 
a45aab6
33b440a
c2e437e
f05a7d1
5723e0b
a45aab6
 
d387203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33b440a
6e0eacd
 
a45aab6
6e0eacd
 
 
a45aab6
6e0eacd
 
 
 
 
 
a45aab6
6e0eacd
 
 
 
 
a45aab6
 
 
 
 
 
 
6e0eacd
 
33b440a
6e0eacd
d387203
f05a7d1
d387203
 
f05a7d1
d387203
 
 
 
 
 
 
 
 
 
 
 
 
6e0eacd
d387203
 
 
 
 
 
6e0eacd
d387203
 
 
 
 
 
f05a7d1
 
33b440a
1bf650f


import google.generativeai as genai
import streamlit as st
import fitz
import spacy
from docx import Document
import dateparser
from datetime import datetime
from giner import GiNER

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

# Load GLiNER model
giner = GiNER("roberta-large")

def extract_text_from_pdf(file):
    pdf = fitz.open(stream=file.read(), filetype="pdf")
    text = ""
    for page in pdf:
        text += page.get_text()
    return text

def extract_text_from_doc(file):
    doc = Document(file)
    return " ".join([paragraph.text for paragraph in doc.paragraphs])

def authenticate_gemini(api_key):
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('gemini-pro')
        return model
    except Exception as e:
        st.error(f"Authentication failed: {e}")
        return None

def generate_summary(text, model):
    prompt = f"Summarize the following resume:\n\n{text}\n\nProvide a brief overview of the candidate's qualifications, experience, and key skills."
    response = model.generate_content(prompt)
    return response.text

def extract_info(text):
    doc = nlp(text)
    giner_results = giner.annotate(text)

    # Extract companies
    companies = set([ent.text for ent in doc.ents if ent.label_ == "ORG"])
    companies.update([entity['text'] for entity in giner_results if entity['type'] == "ORG"])

    # Extract experience
    experience = max([datetime.now().year - date.year for ent in doc.ents if ent.label_ == "DATE" and (date := dateparser.parse(ent.text)) and date.year <= datetime.now().year] or [0])

    # Extract education
    education = set([ent.text for ent in doc.ents if ent.label_ == "ORG" and any(keyword in ent.text.lower() for keyword in ["university", "college", "institute", "school"])])
    education.update([entity['text'] for entity in giner_results if entity['type'] == "ORG" and any(keyword in entity['text'].lower() for keyword in ["university", "college", "institute", "school"])])

    # Extract contact information
    email = next((ent.text for ent in doc.ents if ent.label_ == "EMAIL"), "Not found")
    phone = next((ent.text for ent in doc.ents if ent.label_ == "PHONE_NUMBER"), "Not found")

    # Use GLiNER for additional entity extraction
    for entity in giner_results:
        if entity['type'] == "PER" and email == "Not found":
            email = entity['text']
        elif entity['type'] == "PHONE" and phone == "Not found":
            phone = entity['text']

    return list(companies), experience, list(education), email, phone

def main():
    st.title("Enhanced Resume Analyzer")

    api_key = st.secrets["GEMINI_API_KEY"]  # Use Streamlit secrets
    uploaded_file = st.file_uploader("Choose a PDF or DOCX file", type=["pdf", "docx"])

    if uploaded_file is not None:
        try:
            model = authenticate_gemini(api_key)
            if model is None:
                return

            if uploaded_file.type == "application/pdf":
                resume_text = extract_text_from_pdf(uploaded_file)
            elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                resume_text = extract_text_from_doc(uploaded_file)
            else:
                st.error("Unsupported file format.")
                return

            companies, experience, education, email, phone = extract_info(resume_text)

            st.subheader("Extracted Information")
            st.write(f"*Years of Experience:* {experience}")
            st.write("*Companies:*", ", ".join(companies))
            st.write("*Education:*", ", ".join(education))
            st.write(f"*Email:* {email}")
            st.write(f"*Phone:* {phone}")

            summary = generate_summary(resume_text, model)
            st.subheader("Resume Summary")
            st.write(summary)

        except Exception as e:
            st.error(f"Error during processing: {str(e)}")
            st.exception(e)  # This will print the full traceback

if __name__ == "__main__":
    main()