Rahman Azhar
commited on
Commit
·
70ee247
1
Parent(s):
6c0ad0f
Switch to FLAN-T5 model for better accessibility
Browse files- README.md +9 -10
- config/config.json +1 -1
- src/generate.py +4 -5
- src/train.py +7 -8
README.md
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
tags:
|
4 |
-
-
|
5 |
- travel
|
6 |
- itinerary-generation
|
7 |
- fine-tuning
|
8 |
-
license:
|
9 |
datasets:
|
10 |
- custom
|
11 |
model-index:
|
@@ -13,13 +13,13 @@ model-index:
|
|
13 |
results: []
|
14 |
---
|
15 |
|
16 |
-
#
|
17 |
|
18 |
-
A custom fine-tuned version of
|
19 |
|
20 |
## Model Description
|
21 |
|
22 |
-
This model is fine-tuned from
|
23 |
|
24 |
### Intended Use
|
25 |
|
@@ -38,10 +38,9 @@ The model is trained on a curated dataset of travel itineraries, including:
|
|
38 |
|
39 |
## Prerequisites
|
40 |
|
41 |
-
1.
|
42 |
-
2.
|
43 |
-
3.
|
44 |
-
4. Hugging Face account and token
|
45 |
|
46 |
## Setup
|
47 |
|
@@ -142,4 +141,4 @@ huggingface-cli upload rahmanazhar/Travereel-Model-V1 output/
|
|
142 |
|
143 |
## License
|
144 |
|
145 |
-
This project uses
|
|
|
1 |
---
|
2 |
language: en
|
3 |
tags:
|
4 |
+
- flan-t5
|
5 |
- travel
|
6 |
- itinerary-generation
|
7 |
- fine-tuning
|
8 |
+
license: apache-2.0
|
9 |
datasets:
|
10 |
- custom
|
11 |
model-index:
|
|
|
13 |
results: []
|
14 |
---
|
15 |
|
16 |
+
# T5 Itinerary Generator
|
17 |
|
18 |
+
A custom fine-tuned version of FLAN-T5 for generating detailed travel itineraries.
|
19 |
|
20 |
## Model Description
|
21 |
|
22 |
+
This model is fine-tuned from Google's FLAN-T5 to specialize in generating detailed travel itineraries based on user preferences, destinations, duration, and budget constraints.
|
23 |
|
24 |
### Intended Use
|
25 |
|
|
|
38 |
|
39 |
## Prerequisites
|
40 |
|
41 |
+
1. Python 3.8 or higher
|
42 |
+
2. CUDA-capable GPU (8GB+ VRAM recommended)
|
43 |
+
3. Hugging Face account and token
|
|
|
44 |
|
45 |
## Setup
|
46 |
|
|
|
141 |
|
142 |
## License
|
143 |
|
144 |
+
This project uses FLAN-T5 which is licensed under the Apache 2.0 License.
|
config/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"model_config": {
|
3 |
-
"base_model": "
|
4 |
"max_length": 512,
|
5 |
"learning_rate": 2e-5,
|
6 |
"num_epochs": 3,
|
|
|
1 |
{
|
2 |
"model_config": {
|
3 |
+
"base_model": "google/flan-t5-base",
|
4 |
"max_length": 512,
|
5 |
"learning_rate": 2e-5,
|
6 |
"num_epochs": 3,
|
src/generate.py
CHANGED
@@ -1,14 +1,13 @@
|
|
1 |
import torch
|
2 |
-
from transformers import
|
3 |
import argparse
|
4 |
import json
|
5 |
|
6 |
class ItineraryGenerator:
|
7 |
def __init__(self, model_path: str):
|
8 |
-
self.tokenizer =
|
9 |
-
self.model =
|
10 |
model_path,
|
11 |
-
torch_dtype=torch.float16,
|
12 |
device_map="auto"
|
13 |
)
|
14 |
self.model.eval()
|
@@ -29,7 +28,7 @@ Budget: {budget}
|
|
29 |
|
30 |
Detailed Itinerary:"""
|
31 |
|
32 |
-
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
33 |
|
34 |
with torch.no_grad():
|
35 |
outputs = self.model.generate(
|
|
|
1 |
import torch
|
2 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
3 |
import argparse
|
4 |
import json
|
5 |
|
6 |
class ItineraryGenerator:
|
7 |
def __init__(self, model_path: str):
|
8 |
+
self.tokenizer = T5Tokenizer.from_pretrained(model_path)
|
9 |
+
self.model = T5ForConditionalGeneration.from_pretrained(
|
10 |
model_path,
|
|
|
11 |
device_map="auto"
|
12 |
)
|
13 |
self.model.eval()
|
|
|
28 |
|
29 |
Detailed Itinerary:"""
|
30 |
|
31 |
+
inputs = self.tokenizer(prompt, return_tensors="pt", max_length=max_length, truncation=True).to(self.model.device)
|
32 |
|
33 |
with torch.no_grad():
|
34 |
outputs = self.model.generate(
|
src/train.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import torch
|
2 |
from transformers import (
|
3 |
-
|
4 |
-
|
5 |
TrainingArguments,
|
6 |
Trainer,
|
7 |
-
|
8 |
)
|
9 |
from datasets import load_dataset
|
10 |
import os
|
@@ -51,7 +51,7 @@ Budget: {example['budget']}"""
|
|
51 |
}
|
52 |
|
53 |
def train_itinerary_model(
|
54 |
-
model_name: str = "
|
55 |
data_path: str = "data/itineraries.json",
|
56 |
output_dir: str = "output",
|
57 |
num_epochs: int = 3,
|
@@ -59,10 +59,9 @@ def train_itinerary_model(
|
|
59 |
learning_rate: float = 2e-5,
|
60 |
):
|
61 |
# Initialize tokenizer and model
|
62 |
-
tokenizer =
|
63 |
-
model =
|
64 |
model_name,
|
65 |
-
torch_dtype=torch.float16,
|
66 |
device_map="auto"
|
67 |
)
|
68 |
|
@@ -88,7 +87,7 @@ def train_itinerary_model(
|
|
88 |
model=model,
|
89 |
args=training_args,
|
90 |
train_dataset=dataset,
|
91 |
-
data_collator=
|
92 |
)
|
93 |
|
94 |
# Train the model
|
|
|
1 |
import torch
|
2 |
from transformers import (
|
3 |
+
T5ForConditionalGeneration,
|
4 |
+
T5Tokenizer,
|
5 |
TrainingArguments,
|
6 |
Trainer,
|
7 |
+
DataCollatorForSeq2Seq
|
8 |
)
|
9 |
from datasets import load_dataset
|
10 |
import os
|
|
|
51 |
}
|
52 |
|
53 |
def train_itinerary_model(
|
54 |
+
model_name: str = "google/flan-t5-base",
|
55 |
data_path: str = "data/itineraries.json",
|
56 |
output_dir: str = "output",
|
57 |
num_epochs: int = 3,
|
|
|
59 |
learning_rate: float = 2e-5,
|
60 |
):
|
61 |
# Initialize tokenizer and model
|
62 |
+
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
63 |
+
model = T5ForConditionalGeneration.from_pretrained(
|
64 |
model_name,
|
|
|
65 |
device_map="auto"
|
66 |
)
|
67 |
|
|
|
87 |
model=model,
|
88 |
args=training_args,
|
89 |
train_dataset=dataset,
|
90 |
+
data_collator=DataCollatorForSeq2Seq(tokenizer)
|
91 |
)
|
92 |
|
93 |
# Train the model
|