Spaces:
Runtime error
Runtime error
import os | |
import torch | |
import requests | |
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import google.generativeai as genai | |
from sentence_transformers import SentenceTransformer | |
from torch.nn.functional import cosine_similarity | |
df = pd.read_csv("cleaned_data.csv") | |
bert_model = SentenceTransformer('all-MiniLM-L6-v2') | |
df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True)) | |
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
def recommend_courses(skills, interests, experience, education, time, certificates, careerpath): | |
try: | |
# Create initial user profile | |
user_profile = { | |
"skills": [s.strip() for s in skills.split(",") if s.strip()], | |
"interests": [s.strip() for s in interests.split(",") if s.strip()], | |
"experience": [s.strip() for s in experience.split(",") if s.strip()], | |
"education": [s.strip() for s in education.split(",") if s.strip()], | |
"time": [s.strip() for s in time.split(",") if s.strip()], | |
"certificates": [s.strip() for s in certificates.split(",") if s.strip()], | |
"careerpath": [s.strip() for s in careerpath.split(",") if s.strip()] | |
} | |
# Get skill level assessment | |
response = client.models.generate_content( | |
model="gemini-pro", | |
contents=f""" | |
Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'. | |
Here is the user profile: {user_profile} | |
strictly do not output any extra textual data.""" | |
) | |
CurrentSkill = response.text.strip().replace("\n", "") | |
user_profile["CurrentSkill"] = [CurrentSkill] | |
user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"]) | |
# # Create weighted user text representation | |
# user_text = " ".join([ | |
# " ".join(user_profile["skills"]) * 3, | |
# " ".join(user_profile["interests"]) * 2, | |
# " ".join(user_profile["careerpath"]) * 2, | |
# " ".join(user_profile["experience"]), | |
# " ".join(user_profile["education"]), | |
# " ".join(user_profile["certificates"]), | |
# " ".join(user_profile["CurrentSkill"]) * 2 | |
# ]) | |
user_embedding = bert_model.encode(user_text, convert_to_tensor=True) | |
course_embeddings = torch.stack(df["course_embedding"].tolist()) | |
similarities = cosine_similarity(user_embedding, course_embeddings) | |
# similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0] | |
# Original weighting scheme | |
weights = { | |
"similarity": 0.6, | |
"rating": 0.2, | |
"difficulty": 0.1, | |
"time_to_complete": 0.1 | |
} | |
df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / ( | |
df["course_rating"].max() - df["course_rating"].min()) | |
df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max()) | |
df["ranking_score"] = ( | |
weights["similarity"] * similarities.cpu().numpy() + | |
weights["rating"] * df["normalized_rating"].values + | |
weights["difficulty"] * df["normalized_difficulty"].values | |
) | |
top_courses = df.sort_values(by="ranking_score", ascending=False).head(6) | |
output = top_courses["course_name"].tolist() | |
response2 = client.models.generate_content( | |
model="gemini-pro", | |
contents=f""" | |
Return a JSON object with this exact structure: | |
{{ | |
"beginner": [ | |
{{"name": "course name", "url": "course url"}} | |
], | |
"intermediate": [ | |
{{"name": "course name", "url": "course url"}} | |
], | |
"advanced": [ | |
{{"name": "course name", "url": "course url"}} | |
] | |
}} | |
Categorize these courses: {output} | |
Add Url of the specific course from {df["course_url"]} | |
Based on: | |
- User skill level: {CurrentSkill} | |
- Course difficulties: {top_courses['normalized_difficulty'].tolist()} | |
- User skills: {user_profile['skills']} | |
Categorise atleast one course for each beginner, intermediate and advanced. | |
Return ONLY valid JSON without any extra text. | |
""" | |
) | |
try: | |
json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', '')) | |
# Validate structure | |
for level in ['beginner', 'intermediate', 'advanced']: | |
if level not in json_response: | |
json_response[level] = [] | |
else: | |
# Ensure each course has name and url | |
for course in json_response[level]: | |
if not isinstance(course, dict) or 'name' not in course or 'url' not in course: | |
json_response[level] = [] | |
break | |
return json_response | |
except: | |
return { | |
"beginner": [], | |
"intermediate": [], | |
"advanced": [], | |
"error": "Failed to categorize courses" | |
} | |
except Exception as e: | |
return {"error": str(e)} | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=recommend_courses, | |
inputs=[ | |
gr.Textbox(label="Skills", placeholder="python, machine learning"), | |
gr.Textbox(label="Interests", placeholder="AI, data science"), | |
gr.Textbox(label="Experience", placeholder="2 years python"), | |
gr.Textbox(label="Education", placeholder="bachelor's in CS"), | |
gr.Textbox(label="Time Available", placeholder="6 months"), | |
gr.Textbox(label="Certificates", placeholder="AWS, GCP"), | |
gr.Textbox(label="Career Path", placeholder="ML engineer") | |
], | |
outputs=gr.JSON(), | |
title="Personalized Course Recommender", | |
description="Enter your profile details to get course recommendations organized by difficulty level" | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) |