Switch to FLAN-T5 model for better accessibility

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
 language: en
 tags:
-- llama-2
 - travel
 - itinerary-generation
 - fine-tuning
-license: llama2
 datasets:
 - custom
 model-index:
@@ -13,13 +13,13 @@ model-index:
   results: []
 ---
-# LLaMA Itinerary Generator
-A custom fine-tuned version of LLaMA-2 for generating detailed travel itineraries.
 ## Model Description
-This model is fine-tuned from LLaMA-2 to specialize in generating detailed travel itineraries based on user preferences, destinations, duration, and budget constraints.
 ### Intended Use
@@ -38,10 +38,9 @@ The model is trained on a curated dataset of travel itineraries, including:
 ## Prerequisites
-1. Access to Hugging Face's LLaMA-2 model (requires approval from Meta)
-2. Python 3.8 or higher
-3. CUDA-capable GPU with at least 16GB VRAM
-4. Hugging Face account and token
 ## Setup
@@ -142,4 +141,4 @@ huggingface-cli upload rahmanazhar/Travereel-Model-V1 output/
 ## License
-This project uses LLaMA 2 which is licensed under the LLAMA 2 Community License Agreement.

 ---
 language: en
 tags:
+- flan-t5
 - travel
 - itinerary-generation
 - fine-tuning
+license: apache-2.0
 datasets:
 - custom
 model-index:
   results: []
 ---
+# T5 Itinerary Generator
+A custom fine-tuned version of FLAN-T5 for generating detailed travel itineraries.
 ## Model Description
+This model is fine-tuned from Google's FLAN-T5 to specialize in generating detailed travel itineraries based on user preferences, destinations, duration, and budget constraints.
 ### Intended Use
 ## Prerequisites
+1. Python 3.8 or higher
+2. CUDA-capable GPU (8GB+ VRAM recommended)
+3. Hugging Face account and token
 ## Setup
 ## License
+This project uses FLAN-T5 which is licensed under the Apache 2.0 License.

config/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "model_config": {
-        "base_model": "meta-llama/Llama-2-7b-hf",
         "max_length": 512,
         "learning_rate": 2e-5,
         "num_epochs": 3,

 {
     "model_config": {
+        "base_model": "google/flan-t5-base",
         "max_length": 512,
         "learning_rate": 2e-5,
         "num_epochs": 3,

src/generate.py CHANGED Viewed

@@ -1,14 +1,13 @@
 import torch
-from transformers import LlamaForCausalLM, LlamaTokenizer
 import argparse
 import json
 class ItineraryGenerator:
     def __init__(self, model_path: str):
-        self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
-        self.model = LlamaForCausalLM.from_pretrained(
             model_path,
-            torch_dtype=torch.float16,
             device_map="auto"
         )
         self.model.eval()
@@ -29,7 +28,7 @@ Budget: {budget}
 Detailed Itinerary:"""
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(

 import torch
+from transformers import T5ForConditionalGeneration, T5Tokenizer
 import argparse
 import json
 class ItineraryGenerator:
     def __init__(self, model_path: str):
+        self.tokenizer = T5Tokenizer.from_pretrained(model_path)
+        self.model = T5ForConditionalGeneration.from_pretrained(
             model_path,
             device_map="auto"
         )
         self.model.eval()
 Detailed Itinerary:"""
+        inputs = self.tokenizer(prompt, return_tensors="pt", max_length=max_length, truncation=True).to(self.model.device)
         with torch.no_grad():
             outputs = self.model.generate(

src/train.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import torch
 from transformers import (
-    LlamaForCausalLM,
-    LlamaTokenizer,
     TrainingArguments,
     Trainer,
-    DataCollatorForLanguageModeling
 )
 from datasets import load_dataset
 import os
@@ -51,7 +51,7 @@ Budget: {example['budget']}"""
         }
 def train_itinerary_model(
-    model_name: str = "meta-llama/Llama-2-7b-hf",
     data_path: str = "data/itineraries.json",
     output_dir: str = "output",
     num_epochs: int = 3,
@@ -59,10 +59,9 @@ def train_itinerary_model(
     learning_rate: float = 2e-5,
 ):
     # Initialize tokenizer and model
-    tokenizer = LlamaTokenizer.from_pretrained(model_name)
-    model = LlamaForCausalLM.from_pretrained(
         model_name,
-        torch_dtype=torch.float16,
         device_map="auto"
     )
@@ -88,7 +87,7 @@ def train_itinerary_model(
         model=model,
         args=training_args,
         train_dataset=dataset,
-        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
     )
     # Train the model

 import torch
 from transformers import (
+    T5ForConditionalGeneration,
+    T5Tokenizer,
     TrainingArguments,
     Trainer,
+    DataCollatorForSeq2Seq
 )
 from datasets import load_dataset
 import os
         }
 def train_itinerary_model(
+    model_name: str = "google/flan-t5-base",
     data_path: str = "data/itineraries.json",
     output_dir: str = "output",
     num_epochs: int = 3,
     learning_rate: float = 2e-5,
 ):
     # Initialize tokenizer and model
+    tokenizer = T5Tokenizer.from_pretrained(model_name)
+    model = T5ForConditionalGeneration.from_pretrained(
         model_name,
         device_map="auto"
     )
         model=model,
         args=training_args,
         train_dataset=dataset,
+        data_collator=DataCollatorForSeq2Seq(tokenizer)
     )
     # Train the model