Rahman Azhar commited on
Commit
70ee247
·
1 Parent(s): 6c0ad0f

Switch to FLAN-T5 model for better accessibility

Browse files
Files changed (4) hide show
  1. README.md +9 -10
  2. config/config.json +1 -1
  3. src/generate.py +4 -5
  4. src/train.py +7 -8
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
  language: en
3
  tags:
4
- - llama-2
5
  - travel
6
  - itinerary-generation
7
  - fine-tuning
8
- license: llama2
9
  datasets:
10
  - custom
11
  model-index:
@@ -13,13 +13,13 @@ model-index:
13
  results: []
14
  ---
15
 
16
- # LLaMA Itinerary Generator
17
 
18
- A custom fine-tuned version of LLaMA-2 for generating detailed travel itineraries.
19
 
20
  ## Model Description
21
 
22
- This model is fine-tuned from LLaMA-2 to specialize in generating detailed travel itineraries based on user preferences, destinations, duration, and budget constraints.
23
 
24
  ### Intended Use
25
 
@@ -38,10 +38,9 @@ The model is trained on a curated dataset of travel itineraries, including:
38
 
39
  ## Prerequisites
40
 
41
- 1. Access to Hugging Face's LLaMA-2 model (requires approval from Meta)
42
- 2. Python 3.8 or higher
43
- 3. CUDA-capable GPU with at least 16GB VRAM
44
- 4. Hugging Face account and token
45
 
46
  ## Setup
47
 
@@ -142,4 +141,4 @@ huggingface-cli upload rahmanazhar/Travereel-Model-V1 output/
142
 
143
  ## License
144
 
145
- This project uses LLaMA 2 which is licensed under the LLAMA 2 Community License Agreement.
 
1
  ---
2
  language: en
3
  tags:
4
+ - flan-t5
5
  - travel
6
  - itinerary-generation
7
  - fine-tuning
8
+ license: apache-2.0
9
  datasets:
10
  - custom
11
  model-index:
 
13
  results: []
14
  ---
15
 
16
+ # T5 Itinerary Generator
17
 
18
+ A custom fine-tuned version of FLAN-T5 for generating detailed travel itineraries.
19
 
20
  ## Model Description
21
 
22
+ This model is fine-tuned from Google's FLAN-T5 to specialize in generating detailed travel itineraries based on user preferences, destinations, duration, and budget constraints.
23
 
24
  ### Intended Use
25
 
 
38
 
39
  ## Prerequisites
40
 
41
+ 1. Python 3.8 or higher
42
+ 2. CUDA-capable GPU (8GB+ VRAM recommended)
43
+ 3. Hugging Face account and token
 
44
 
45
  ## Setup
46
 
 
141
 
142
  ## License
143
 
144
+ This project uses FLAN-T5 which is licensed under the Apache 2.0 License.
config/config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "model_config": {
3
- "base_model": "meta-llama/Llama-2-7b-hf",
4
  "max_length": 512,
5
  "learning_rate": 2e-5,
6
  "num_epochs": 3,
 
1
  {
2
  "model_config": {
3
+ "base_model": "google/flan-t5-base",
4
  "max_length": 512,
5
  "learning_rate": 2e-5,
6
  "num_epochs": 3,
src/generate.py CHANGED
@@ -1,14 +1,13 @@
1
  import torch
2
- from transformers import LlamaForCausalLM, LlamaTokenizer
3
  import argparse
4
  import json
5
 
6
  class ItineraryGenerator:
7
  def __init__(self, model_path: str):
8
- self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
9
- self.model = LlamaForCausalLM.from_pretrained(
10
  model_path,
11
- torch_dtype=torch.float16,
12
  device_map="auto"
13
  )
14
  self.model.eval()
@@ -29,7 +28,7 @@ Budget: {budget}
29
 
30
  Detailed Itinerary:"""
31
 
32
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
33
 
34
  with torch.no_grad():
35
  outputs = self.model.generate(
 
1
  import torch
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
  import argparse
4
  import json
5
 
6
  class ItineraryGenerator:
7
  def __init__(self, model_path: str):
8
+ self.tokenizer = T5Tokenizer.from_pretrained(model_path)
9
+ self.model = T5ForConditionalGeneration.from_pretrained(
10
  model_path,
 
11
  device_map="auto"
12
  )
13
  self.model.eval()
 
28
 
29
  Detailed Itinerary:"""
30
 
31
+ inputs = self.tokenizer(prompt, return_tensors="pt", max_length=max_length, truncation=True).to(self.model.device)
32
 
33
  with torch.no_grad():
34
  outputs = self.model.generate(
src/train.py CHANGED
@@ -1,10 +1,10 @@
1
  import torch
2
  from transformers import (
3
- LlamaForCausalLM,
4
- LlamaTokenizer,
5
  TrainingArguments,
6
  Trainer,
7
- DataCollatorForLanguageModeling
8
  )
9
  from datasets import load_dataset
10
  import os
@@ -51,7 +51,7 @@ Budget: {example['budget']}"""
51
  }
52
 
53
  def train_itinerary_model(
54
- model_name: str = "meta-llama/Llama-2-7b-hf",
55
  data_path: str = "data/itineraries.json",
56
  output_dir: str = "output",
57
  num_epochs: int = 3,
@@ -59,10 +59,9 @@ def train_itinerary_model(
59
  learning_rate: float = 2e-5,
60
  ):
61
  # Initialize tokenizer and model
62
- tokenizer = LlamaTokenizer.from_pretrained(model_name)
63
- model = LlamaForCausalLM.from_pretrained(
64
  model_name,
65
- torch_dtype=torch.float16,
66
  device_map="auto"
67
  )
68
 
@@ -88,7 +87,7 @@ def train_itinerary_model(
88
  model=model,
89
  args=training_args,
90
  train_dataset=dataset,
91
- data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
92
  )
93
 
94
  # Train the model
 
1
  import torch
2
  from transformers import (
3
+ T5ForConditionalGeneration,
4
+ T5Tokenizer,
5
  TrainingArguments,
6
  Trainer,
7
+ DataCollatorForSeq2Seq
8
  )
9
  from datasets import load_dataset
10
  import os
 
51
  }
52
 
53
  def train_itinerary_model(
54
+ model_name: str = "google/flan-t5-base",
55
  data_path: str = "data/itineraries.json",
56
  output_dir: str = "output",
57
  num_epochs: int = 3,
 
59
  learning_rate: float = 2e-5,
60
  ):
61
  # Initialize tokenizer and model
62
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
63
+ model = T5ForConditionalGeneration.from_pretrained(
64
  model_name,
 
65
  device_map="auto"
66
  )
67
 
 
87
  model=model,
88
  args=training_args,
89
  train_dataset=dataset,
90
+ data_collator=DataCollatorForSeq2Seq(tokenizer)
91
  )
92
 
93
  # Train the model