SupremoUGH commited on
Commit
799babb
·
unverified ·
1 Parent(s): 1cd7cce

autotrain set up

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. Dockerfile +22 -2
  3. requirements.txt +5 -0
  4. train.py +56 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv
2
+ results
Dockerfile CHANGED
@@ -1,2 +1,22 @@
1
- FROM huggingface/autotrain-advanced:latest
2
- CMD pip uninstall -y autotrain-advanced && pip install -U autotrain-advanced && autotrain app --host 0.0.0.0 --port 7860 --workers 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a Hugging Face image with PyTorch and Transformers
2
+ FROM huggingface/transformers-pytorch-cpu:latest
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file first (to leverage Docker's caching mechanism)
8
+ COPY requirements.txt .
9
+
10
+ # Install and update python3
11
+ RUN apt update
12
+ RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg
13
+ RUN python3 -m pip install --no-cache-dir --upgrade pip
14
+
15
+ # Install dependencies
16
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy all remaining files into the container
19
+ COPY . .
20
+
21
+ # Run the training script when the container starts
22
+ CMD ["python3", "train.py"]
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ transformers>=4.18.0
2
+ datasets>=2.0.0
3
+ torch>=1.10.0
4
+ Pillow>=8.4.0
5
+ accelerate>=0.9.0
train.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ AutoModelForImageClassification,
3
+ AutoImageProcessor,
4
+ TrainingArguments,
5
+ Trainer,
6
+ )
7
+ from datasets import load_dataset
8
+ import os
9
+
10
+ def train():
11
+ # Load dataset
12
+ dataset = load_dataset("ylecun/mnist")
13
+
14
+ # Load processor and apply preprocessing to the dataset
15
+ processor = AutoImageProcessor.from_pretrained("SupremoUGH/image-classification-model")
16
+
17
+ def process(examples):
18
+ images = [img.convert("RGB") for img in examples["image"]]
19
+ inputs = processor(images=images, return_tensors="pt")
20
+ inputs["labels"] = examples["label"]
21
+ return inputs
22
+
23
+ dataset.set_transform(process) # Sometimes `map` instead of `set_transform`
24
+
25
+ # Load model and train it with certain training arguments
26
+ model = AutoModelForImageClassification.from_pretrained("SupremoUGH/image-classification-model")
27
+ training_args = TrainingArguments(
28
+ output_dir="./results",
29
+ remove_unused_columns=False, # Preserve input data
30
+ per_device_train_batch_size=16, # Reduce batch size for efficiency
31
+ eval_strategy="steps",
32
+ num_train_epochs=3,
33
+ fp16=False, # Disable fp16 mixed precision
34
+ save_steps=500,
35
+ eval_steps=500,
36
+ logging_steps=100,
37
+ learning_rate=2e-4,
38
+ push_to_hub=False,
39
+ )
40
+ trainer = Trainer(
41
+ model=model,
42
+ args=training_args,
43
+ train_dataset=dataset["train"],
44
+ eval_dataset=dataset["test"], # Sometimes called "validation"
45
+ )
46
+ trainer.train()
47
+
48
+ # Save fine-tuned model
49
+ save_dir = "./saved_model"
50
+ os.makedirs(save_dir, exist_ok=True)
51
+ model.save_pretrained(save_dir)
52
+ print(f"Model saved to {save_dir}")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ train()