yashbyname's picture
Update app.py
94dabc7 verified
raw
history blame contribute delete
6.43 kB
import os
import torch
import requests
import numpy as np
import pandas as pd
import gradio as gr
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from torch.nn.functional import cosine_similarity
df = pd.read_csv("cleaned_data.csv")
bert_model = SentenceTransformer('all-MiniLM-L6-v2')
df["course_embedding"] = df["Transformed_description"].apply(lambda x: bert_model.encode(x, convert_to_tensor=True))
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
def recommend_courses(skills, interests, experience, education, time, certificates, careerpath):
try:
# Create initial user profile
user_profile = {
"skills": [s.strip() for s in skills.split(",") if s.strip()],
"interests": [s.strip() for s in interests.split(",") if s.strip()],
"experience": [s.strip() for s in experience.split(",") if s.strip()],
"education": [s.strip() for s in education.split(",") if s.strip()],
"time": [s.strip() for s in time.split(",") if s.strip()],
"certificates": [s.strip() for s in certificates.split(",") if s.strip()],
"careerpath": [s.strip() for s in careerpath.split(",") if s.strip()]
}
# Get skill level assessment
response = client.models.generate_content(
model="gemini-pro",
contents=f"""
Give the current skill level in one word out of 'beginner', 'intermediate', 'advanced'.
Here is the user profile: {user_profile}
strictly do not output any extra textual data."""
)
CurrentSkill = response.text.strip().replace("\n", "")
user_profile["CurrentSkill"] = [CurrentSkill]
user_text = " ".join(user_profile["skills"] + user_profile["interests"] + user_profile["experience"] + user_profile["education"] + user_profile["time"] + user_profile["certificates"] + user_profile["careerpath"] + user_profile["CurrentSkill"])
# # Create weighted user text representation
# user_text = " ".join([
# " ".join(user_profile["skills"]) * 3,
# " ".join(user_profile["interests"]) * 2,
# " ".join(user_profile["careerpath"]) * 2,
# " ".join(user_profile["experience"]),
# " ".join(user_profile["education"]),
# " ".join(user_profile["certificates"]),
# " ".join(user_profile["CurrentSkill"]) * 2
# ])
user_embedding = bert_model.encode(user_text, convert_to_tensor=True)
course_embeddings = torch.stack(df["course_embedding"].tolist())
similarities = cosine_similarity(user_embedding, course_embeddings)
# similarities = cosine_similarity(user_embedding.unsqueeze(0), course_embeddings)[0]
# Original weighting scheme
weights = {
"similarity": 0.6,
"rating": 0.2,
"difficulty": 0.1,
"time_to_complete": 0.1
}
df["normalized_rating"] = (df["course_rating"] - df["course_rating"].min()) / (
df["course_rating"].max() - df["course_rating"].min())
df["normalized_difficulty"] = 1 - (df["course_difficulty"] / df["course_difficulty"].max())
df["ranking_score"] = (
weights["similarity"] * similarities.cpu().numpy() +
weights["rating"] * df["normalized_rating"].values +
weights["difficulty"] * df["normalized_difficulty"].values
)
top_courses = df.sort_values(by="ranking_score", ascending=False).head(6)
output = top_courses["course_name"].tolist()
response2 = client.models.generate_content(
model="gemini-pro",
contents=f"""
Return a JSON object with this exact structure:
{{
"beginner": [
{{"name": "course name", "url": "course url"}}
],
"intermediate": [
{{"name": "course name", "url": "course url"}}
],
"advanced": [
{{"name": "course name", "url": "course url"}}
]
}}
Categorize these courses: {output}
Add Url of the specific course from {df["course_url"]}
Based on:
- User skill level: {CurrentSkill}
- Course difficulties: {top_courses['normalized_difficulty'].tolist()}
- User skills: {user_profile['skills']}
Categorise atleast one course for each beginner, intermediate and advanced.
Return ONLY valid JSON without any extra text.
"""
)
try:
json_response = json.loads(response2.text.strip().replace('```json', '').replace('```', ''))
# Validate structure
for level in ['beginner', 'intermediate', 'advanced']:
if level not in json_response:
json_response[level] = []
else:
# Ensure each course has name and url
for course in json_response[level]:
if not isinstance(course, dict) or 'name' not in course or 'url' not in course:
json_response[level] = []
break
return json_response
except:
return {
"beginner": [],
"intermediate": [],
"advanced": [],
"error": "Failed to categorize courses"
}
except Exception as e:
return {"error": str(e)}
# Create Gradio interface
iface = gr.Interface(
fn=recommend_courses,
inputs=[
gr.Textbox(label="Skills", placeholder="python, machine learning"),
gr.Textbox(label="Interests", placeholder="AI, data science"),
gr.Textbox(label="Experience", placeholder="2 years python"),
gr.Textbox(label="Education", placeholder="bachelor's in CS"),
gr.Textbox(label="Time Available", placeholder="6 months"),
gr.Textbox(label="Certificates", placeholder="AWS, GCP"),
gr.Textbox(label="Career Path", placeholder="ML engineer")
],
outputs=gr.JSON(),
title="Personalized Course Recommender",
description="Enter your profile details to get course recommendations organized by difficulty level"
)
if __name__ == "__main__":
iface.launch(share=True)