yashoda74679 commited on
Commit
3c479cd
·
verified ·
1 Parent(s): a134229

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoTokenizer
3
+ import torch
4
+
5
+ # Load Dataset
6
+ dataset = load_dataset("yelp_review_full") # Example dataset
7
+
8
+ # Load Pretrained Model & Tokenizer
9
+ model_name = "bert-base-uncased"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5)
12
+
13
+ # Tokenize Dataset
14
+ def preprocess_function(examples):
15
+ return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=512)
16
+
17
+ encoded_dataset = dataset.map(preprocess_function, batched=True)
18
+
19
+ # Training Arguments
20
+ training_args = TrainingArguments(
21
+ output_dir="./results",
22
+ evaluation_strategy="epoch",
23
+ save_strategy="epoch",
24
+ per_device_train_batch_size=8,
25
+ per_device_eval_batch_size=8,
26
+ num_train_epochs=3,
27
+ weight_decay=0.01,
28
+ push_to_hub=True # Push trained model back to Hugging Face
29
+ )
30
+
31
+ # Define Trainer
32
+ trainer = Trainer(
33
+ model=model,
34
+ args=training_args,
35
+ train_dataset=encoded_dataset["train"],
36
+ eval_dataset=encoded_dataset["test"],
37
+ tokenizer=tokenizer
38
+ )
39
+
40
+ # Train the Model
41
+ trainer.train()
42
+
43
+ # Save & Push to Hub
44
+ trainer.push_to_hub()