philocifer commited on
Commit
f6a2023
Β·
1 Parent(s): 51ed0dc

Added fine tuning evaluation

Browse files
Files changed (3) hide show
  1. README.md +24 -0
  2. finetune_eval.py +36 -0
  3. rag_agent.py +2 -2
README.md CHANGED
@@ -59,3 +59,27 @@ Weaknesses:
59
  3. Noise Sensitivity (0.5952) - Vulnerable to irrelevant/conflicting information
60
  ### Fine-Tuning Open-Source Embeddings
61
  https://huggingface.co/philocifer/banner-flip-arctic-embed-l
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  3. Noise Sensitivity (0.5952) - Vulnerable to irrelevant/conflicting information
60
  ### Fine-Tuning Open-Source Embeddings
61
  https://huggingface.co/philocifer/banner-flip-arctic-embed-l
62
+ ### Assessing Performance of Fine-Tuned Embeddings
63
+ | Metric | Score |
64
+ |-----------------------------|---------|
65
+ | Context Recall | 0.9175 |
66
+ | Faithfulness | 0.8203 |
67
+ | Factual Correctness | 0.7225 |
68
+ | Answer Relevancy | 0.9669 |
69
+ | Context Entity Recall | 0.5711 |
70
+ | Noise Sensitivity Relevant | 0.0000 |
71
+
72
+ #### Evaluation Comparison
73
+ Significant Improvements
74
+ - Factual Correctness surged 39% (0.52 β†’ 0.72) - Substantially more reliable answers
75
+ - Context Recall jumped 16% (0.79 β†’ 0.92) - Better retrieval of relevant information
76
+ - Answer Relevancy reached near-perfect 0.97 (+7%) - Sharper focus on query intent
77
+
78
+ Trade-offs
79
+ - Faithfulness dipped 6% (0.87 β†’ 0.82) - Slightly less strict adherence to source context despite better facts
80
+
81
+ Notable Changes
82
+ - Noise Sensitivity collapsed to 0.00 (-100%) - Complete immunity to irrelevant information (requires verification)
83
+ - Entity Recognition improved 31% (0.44 β†’ 0.57) - Remains a relative weakness in the system
84
+
85
+ In the second half of the course, I will focus more on improving the SQL agent as it is much better at handling structured data in large volumes.
finetune_eval.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rag_agent import load_agent, rag_agent
2
+ from ragas_eval import run_ragas_evaluation
3
+ from synthetic_data_gen import generate_synthetic_data
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
+ from dotenv import load_dotenv
6
+ import json
7
+
8
+ load_dotenv()
9
+
10
+ print("Loading fine-tuned embeddings...")
11
+ finetuned_embeddings = HuggingFaceEmbeddings(model_name="philocifer/banner-flip-arctic-embed-l")
12
+
13
+ print("Loading fine-tuned RAG agent...")
14
+ finetuned_rag_agent = load_agent(embeddings=finetuned_embeddings, embedding_dimension=1024)
15
+
16
+ print("Generating synthetic data...")
17
+ dataset = generate_synthetic_data()
18
+
19
+ print("Running fine-tuned RAGAS evaluation...")
20
+ finetuned_result = run_ragas_evaluation(finetuned_rag_agent, dataset)
21
+
22
+ print(f"Fine-tuned RAGAS Evaluation Result: {finetuned_result}")
23
+
24
+ print("Saving fine-tuned RAGAS evaluation result...")
25
+ with open("ragas_eval/finetuned_result.json", "w") as f:
26
+ json.dump(finetuned_result, f)
27
+
28
+ print("Running base RAGAS evaluation...")
29
+ base_result = run_ragas_evaluation(rag_agent, dataset)
30
+
31
+ print(f"Base RAGAS Evaluation Result: {base_result}")
32
+
33
+ print("Saving base RAGAS evaluation result...")
34
+ with open("ragas_eval/base_result.json", "w") as f:
35
+ json.dump(base_result, f)
36
+
rag_agent.py CHANGED
@@ -14,7 +14,7 @@ from tqdm import tqdm
14
 
15
  load_dotenv()
16
 
17
- def load_agent(embeddings=None):
18
  if embeddings is None:
19
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
20
 
@@ -31,7 +31,7 @@ def load_agent(embeddings=None):
31
  client = QdrantClient(":memory:")
32
  client.create_collection(
33
  collection_name="competitor_stores",
34
- vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
35
  )
36
 
37
  vector_store = QdrantVectorStore(
 
14
 
15
  load_dotenv()
16
 
17
+ def load_agent(embeddings=None, embedding_dimension=1536):
18
  if embeddings is None:
19
  embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
20
 
 
31
  client = QdrantClient(":memory:")
32
  client.create_collection(
33
  collection_name="competitor_stores",
34
+ vectors_config=VectorParams(size=embedding_dimension, distance=Distance.COSINE),
35
  )
36
 
37
  vector_store = QdrantVectorStore(