Spaces:

eksemyashkina
/

plants-classification

Sleeping

App Files Files Community

eksemyashkina commited on Nov 3, 2024

Commit

f096e52

1 Parent(s): 8d55205

Added files

Browse files

Files changed (28) hide show

app.py +75 -0
assets/banana.jpg +0 -0
assets/black.jpg +0 -0
assets/eggplant.jpg +0 -0
assets/mango.jpg +0 -0
assets/melon.jpg +0 -0
assets/orange.jpg +0 -0
assets/pineapple.jpg +0 -0
assets/white.jpg +0 -0
labels.json +32 -0
requirements.txt +5 -0
src/__pycache__/dataset.cpython-310.pyc +0 -0
src/__pycache__/utils.cpython-310.pyc +0 -0
src/dataset.py +48 -0
src/models/__pycache__/mobilenet_v2.cpython-310.pyc +0 -0
src/models/__pycache__/mobilenet_v2.cpython-312.pyc +0 -0
src/models/__pycache__/resnet50.cpython-310.pyc +0 -0
src/models/__pycache__/resnet50.cpython-312.pyc +0 -0
src/models/mobilenet_v2.py +151 -0
src/models/resnet50.py +147 -0
src/train.py +152 -0
src/utils.py +32 -0
weights/checkpoint-best-mobilenet.pth +3 -0
weights/checkpoint-best-resnet.pth +3 -0
weights/download_checkpoints.sh +2 -0
weights/download_pretrained.py +0 -0
weights/mobilenet_v2-b0353104.pth +3 -0
weights/resnet50-0676ba61.pth +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from typing import Dict
+import gradio as gr
+import json
+import PIL.Image, PIL.ImageOps
+import torch
+import torchvision.transforms.functional as F
+from src.models.resnet50 import ResNet
+from src.models.mobilenet_v2 import MobileNetV2
+num_classes = 30
+model1 = ResNet(weights_path="weights/checkpoint-best-resnet.pth", num_classes=num_classes)
+model1.eval()
+model2 = MobileNetV2(weights_path="weights/checkpoint-best-mobilenet.pth", num_classes=num_classes)
+model2.eval()
+with open("labels.json", "r") as f:
+    class_labels = json.load(f)
+label_mapping = {v: k for k, v in class_labels.items()}
+def predict(img, model_choice) -> Dict[str, float]:
+    model = model1 if model_choice == "ResNet" else model2
+    width, height = img.size
+    max_dim = max(width, height)
+    padding = (max_dim - width, max_dim - height)
+    img = PIL.ImageOps.expand(img, padding, (255, 255, 255))
+    img = img.resize((224, 224))
+    img = F.to_tensor(img)
+    img = F.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    img = img.unsqueeze(0)
+    with torch.inference_mode():
+        logits = model.forward(img)
+        probs = torch.nn.functional.softmax(logits, dim=1)
+        top_probs, top_indices = probs[0].topk(3)
+    top_classes = {label_mapping[idx.item()]: prob.item() for idx, prob in zip(top_indices, top_probs)}
+    return top_classes
+examples = [
+    ["assets/banana.jpg"],
+    ["assets/pineapple.jpg"],
+    ["assets/mango.jpg"],
+    ["assets/melon.jpg"],
+    ["assets/orange.jpg"],
+    ["assets/eggplant.jpg"],
+    ["assets/black.jpg"],
+    ["assets/white.jpg"]
+]
+with gr.Blocks() as demo:
+    gr.Markdown("## Plant Classification")
+    with gr.Row():
+        with gr.Column():
+            pic = gr.Image(label="Upload Plant Image", type="pil", height=300, width=300)
+            model_choice = gr.Dropdown(choices=["ResNet", "MobileNetV2"], label="Select Model", value="ResNet")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    predict_btn = gr.Button("Predict")
+                with gr.Column(scale=1):
+                    clear_btn = gr.Button("Clear")
+        with gr.Column():
+            output = gr.Label(label="Top 3 Predicted Classes")
+    predict_btn.click(fn=predict, inputs=[pic, model_choice], outputs=output, api_name="predict")
+    clear_btn.click(lambda: (None, None), outputs=[pic, output])
+    gr.Examples(examples=examples, inputs=[pic])
+demo.launch()

assets/banana.jpg ADDED Viewed

assets/black.jpg ADDED Viewed

assets/eggplant.jpg ADDED Viewed

assets/mango.jpg ADDED Viewed

assets/melon.jpg ADDED Viewed

assets/orange.jpg ADDED Viewed

assets/pineapple.jpg ADDED Viewed

assets/white.jpg ADDED Viewed

labels.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "aloevera": 0,
+  "banana": 1,
+  "bilimbi": 2,
+  "cantaloupe": 3,
+  "cassava": 4,
+  "coconut": 5,
+  "corn": 6,
+  "cucumber": 7,
+  "curcuma": 8,
+  "eggplant": 9,
+  "galangal": 10,
+  "ginger": 11,
+  "guava": 12,
+  "kale": 13,
+  "longbeans": 14,
+  "mango": 15,
+  "melon": 16,
+  "orange": 17,
+  "paddy": 18,
+  "papaya": 19,
+  "peperchili": 20,
+  "pineapple": 21,
+  "pomelo": 22,
+  "shallot": 23,
+  "soybeans": 24,
+  "spinach": 25,
+  "sweetpotatoes": 26,
+  "tobacco": 27,
+  "waterapple": 28,
+  "watermelon": 29
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch==2.4.1
+torchvision==0.19.1
+kaggle==1.6.17
+wandb==0.18.5
+gradio==5.4.0

src/__pycache__/dataset.cpython-310.pyc ADDED Viewed

Binary file (2.26 kB). View file

src/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (713 Bytes). View file

src/dataset.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from typing import List, Dict, Tuple, Callable
+from pathlib import Path
+from torch.utils.data import Dataset
+import PIL.Image
+import torch
+class PlantsDataset(Dataset):
+    def __init__(
+        self,
+        root: str,
+        labels: Dict[int, str],
+        transform: Callable,
+        load_to_ram: bool = True,
+    ) -> None:
+        super().__init__()
+        self.root = root
+        self.labels = labels
+        self.transform = transform
+        self.load_to_ram = load_to_ram
+        self.data = [
+            {
+                "path": x.as_posix(),
+                "label": self.labels[x.parent.name],
+                "image": PIL.Image.open(x).convert("RGB") if self.load_to_ram else None,
+            }
+            for x in sorted(Path(self.root).glob("**/*.jpg"))
+        ]
+    def __len__(self) -> int:
+        return len(self.data)
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        item = self.data[idx]
+        if self.load_to_ram:
+            image = item["image"]
+        else:
+            image = PIL.Image.open(item["path"]).convert("RGB")
+        image = self.transform(image)
+        label = torch.tensor(item["label"], dtype=torch.long)
+        return (image, label)
+def collate_fn(items: List[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, torch.Tensor]:
+    images = torch.cat([item[0].unsqueeze(0) for item in items])
+    labels = torch.cat([item[1].unsqueeze(0) for item in items])
+    return (images, labels)

src/models/__pycache__/mobilenet_v2.cpython-310.pyc ADDED Viewed

Binary file (5.32 kB). View file

src/models/__pycache__/mobilenet_v2.cpython-312.pyc ADDED Viewed

Binary file (9.01 kB). View file

src/models/__pycache__/resnet50.cpython-310.pyc ADDED Viewed

Binary file (5.16 kB). View file

src/models/__pycache__/resnet50.cpython-312.pyc ADDED Viewed

Binary file (9.77 kB). View file

src/models/mobilenet_v2.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from typing import Callable, List, Union
+from pathlib import Path
+import PIL.Image
+import torch
+from torch import nn
+import torchvision.transforms.functional as F
+class Conv2dNormActivation(nn.Module):
+    def __init__(
+        self, in_channels: int, out_channels: int, kernel_size: int = 3, stride: int = 1,
+        padding: int | None = None, groups: int = 1, norm_layer: Callable[..., torch.nn.Module] = nn.BatchNorm2d,
+        activation_layer: Callable[..., torch.nn.Module] = nn.ReLU, bias: bool | None = False,
+    ) -> None:
+        super().__init__()
+        if padding is None:
+            padding = (kernel_size - 1) // 2
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=bias, groups=groups)
+        self.norm = norm_layer(out_channels)
+        self.activation = activation_layer()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.conv(x)
+        x = self.norm(x)
+        x = self.activation(x)
+        return x
+class InvertedResidual(nn.Module):
+    def __init__(
+        self, inp: int, oup: int, stride: int, expand_ratio: int,
+    ) -> None:
+        super().__init__()
+        self.stride = stride
+        hidden_dim = int(round(inp * expand_ratio))
+        self.use_res_connect = self.stride == 1 and inp == oup
+        layers = []
+        if expand_ratio != 1:
+            layers.append(Conv2dNormActivation(inp, hidden_dim, kernel_size=1, norm_layer=nn.BatchNorm2d, activation_layer=nn.ReLU6))
+        layers.extend(
+            [
+                Conv2dNormActivation(
+                    hidden_dim,
+                    hidden_dim,
+                    stride=stride,
+                    groups=hidden_dim,
+                    norm_layer=nn.BatchNorm2d,
+                    activation_layer=nn.ReLU6,
+                ),
+                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+            ]
+        )
+        self.conv = nn.Sequential(*layers)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+class MobileNetV2(nn.Module):
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        weights_path: str | None = None,
+    ) -> None:
+        super().__init__()
+        if weights_path is not None and not Path(weights_path).exists():
+            raise FileNotFoundError(weights_path)
+        input_channel = 32
+        last_channel = 1280
+        inverted_residual_setting = [
+            # t, c, n, s
+            [1, 16, 1, 1],
+            [6, 24, 2, 2],
+            [6, 32, 3, 2],
+            [6, 64, 4, 2],
+            [6, 96, 3, 1],
+            [6, 160, 3, 2],
+            [6, 320, 1, 1],
+        ]
+        features = [Conv2dNormActivation(3, input_channel, stride=2, norm_layer=nn.BatchNorm2d, activation_layer=nn.ReLU6)]
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = c
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(InvertedResidual(input_channel, output_channel, stride, expand_ratio=t))
+                input_channel = output_channel
+        features.append(
+            Conv2dNormActivation(
+                input_channel, last_channel, kernel_size=1, norm_layer=nn.BatchNorm2d, activation_layer=nn.ReLU6
+            )
+        )
+        self.features = nn.Sequential(*features)
+        self.classifier = nn.Sequential(
+            nn.Dropout(p=0.2),
+            nn.Linear(last_channel, num_classes),
+        )
+        if weights_path:
+            self.load_pretrained_weights(weights_path)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
+        x = torch.flatten(x, 1)
+        x = self.classifier(x)
+        return x
+    def load_pretrained_weights(self, weights_path: str) -> None:
+        state_dict = torch.load(weights_path, map_location="cpu")
+        model_state_dict = self.state_dict()
+        new_state_dict = {}
+        for key1, key2 in zip(model_state_dict.keys(), state_dict.keys()):
+            new_state_dict[key1] = state_dict[key2]
+        self.load_state_dict(new_state_dict)
+    @torch.inference_mode()
+    def predict(self, x: torch.Tensor, top_k: int | None) -> Union[List[int], List[List[int]]]:
+        output = self.forward(x)
+        probs = torch.nn.functional.softmax(output, dim=1)
+        if top_k is not None:
+            preds = torch.topk(probs, dim=1, k=top_k).indices
+            return preds.tolist()
+        else:
+            pred = torch.argmax(probs, dim=1)
+            return pred.tolist()
+if __name__ == "__main__":
+    model = MobileNetV2(weights_path="weights\mobilenet_v2-b0353104.pth")
+    num_params = sum([p.numel() for p in model.parameters()])
+    print(f"params: {num_params/1e6:.2f} M")
+    model.eval()
+    image = PIL.Image.open("assets\cat.jpg").convert("RGB")
+    image = F.resize(image, (224, 224))
+    image = F.to_tensor(image)
+    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    image = image.unsqueeze(0)
+    predicted_class = model.predict(image, top_k=10)
+    print(f"predicted class: {predicted_class}")
+    # https://deeplearning.cms.waikato.ac.nz/user-guide/class-maps/IMAGENET/

src/models/resnet50.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from typing import Union, List
+from pathlib import Path
+import PIL.Image
+import torch
+from torch import nn
+import torchvision.transforms.functional as F
+class Bottleneck(nn.Module):
+    expansion: int = 4
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        stride: int = 1,
+        downsample: nn.Module | None = None,
+        groups: int = 1,
+        dilation: int = 1,
+    ) -> None:
+        super().__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=groups, dilation=dilation, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, stride=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    @property
+    def expansion(self):
+        return 4
+    def __init__(
+        self,
+        num_classes: int = 1000,
+        weights_path: str | None = None,
+    ) -> None:
+        super().__init__()
+        if weights_path is not None and not Path(weights_path).exists():
+            raise FileNotFoundError(weights_path)
+        self.inplanes = 64
+        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = nn.BatchNorm2d(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(Bottleneck, 64, 3)
+        self.layer2 = self._make_layer(Bottleneck, 128, 4, stride=2)
+        self.layer3 = self._make_layer(Bottleneck, 256, 6, stride=2)
+        self.layer4 = self._make_layer(Bottleneck, 512, 3, stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * Bottleneck.expansion, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        if weights_path:
+            self.load_pretrained_weights(weights_path)
+    def _make_layer(
+        self,
+        block: Bottleneck,
+        planes: int,
+        blocks: int,
+        stride: int = 1,
+    ) -> nn.Sequential:
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return nn.Sequential(*layers)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = torch.flatten(x, 1)
+        x = self.fc(x)
+        return x
+    def load_pretrained_weights(self, weights_path: str) -> None:
+        state_dict = torch.load(weights_path, map_location="cpu")
+        self.load_state_dict(state_dict)
+    @torch.inference_mode()
+    def predict(self, x: torch.Tensor, top_k: int | None) -> Union[List[int], List[List[int]]]:
+        output = self.forward(x)
+        probs = torch.nn.functional.softmax(output, dim=1)
+        if top_k is not None:
+            preds = torch.topk(probs, dim=1, k=top_k).indices
+            return preds.tolist()
+        else:
+            pred = torch.argmax(probs, dim=1)
+            return pred.tolist()
+if __name__ == "__main__":
+    model = ResNet(weights_path="weights/resnet50-0676ba61.pth")
+    num_params = sum([p.numel() for p in model.parameters()])
+    print(f"params: {num_params/1e6:.2f} M")
+    model.eval()
+    image = PIL.Image.open("assets\cat.jpg").convert("RGB")
+    image = F.resize(image, (224, 224))
+    image = F.to_tensor(image)
+    image = F.normalize(image, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    image = image.unsqueeze(0)
+    predicted_class = model.predict(image, top_k=10)
+    print(f"predicted class: {predicted_class}")
+    # https://deeplearning.cms.waikato.ac.nz/user-guide/class-maps/IMAGENET/

src/train.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from pathlib import Path
+from tqdm import tqdm
+import numpy as np
+import argparse
+import json
+import wandb
+import pickle
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import models
+from models.resnet50 import ResNet
+from models.mobilenet_v2 import MobileNetV2
+from dataset import PlantsDataset
+from utils import train_transform, test_transform, EMA
+def parse_args():
+    parser = argparse.ArgumentParser(description="Train a model on plant dataset")
+    parser.add_argument("--train-root", type=str, default="data/plants/train", help="Path to the training data")
+    parser.add_argument("--test-root", type=str, default="data/plants/test", help="Path to the testing data")
+    parser.add_argument("--load-to-ram", type=bool, default=False, help="Load dataset to RAM")
+    parser.add_argument("--batch-size", type=int, default=32, help="Batch size for training and testing")
+    parser.add_argument("--pin-memory", type=bool, default=True, help="Pin memory for DataLoader")
+    parser.add_argument("--num-workers", type=int, default=1, help="Number of workers for DataLoader")
+    parser.add_argument("--num-epochs", type=int, default=10, help="Number of training epochs")
+    parser.add_argument("--learning-rate", type=float, default=1e-4, help="Learning rate for the optimizer")
+    parser.add_argument("--weights-path", type=str, default="weights/mobilenet_v2-b0353104.pth", choices=["weights/resnet50-0676ba61.pth", "weights/mobilenet_v2-b0353104.pth"], help="Path to the pre-trained weights")
+    parser.add_argument("--project-name", type=str, default="plants_classifier", help="WandB project name")
+    parser.add_argument("--optimizer", type=str, default="AdamW", help="Optimizer type")
+    parser.add_argument("--criterion", type=str, default="CrossEntropyLoss", help="Loss function type")
+    parser.add_argument("--labels-path", type=str, default="labels.json", help="Path to the labels json file")
+    parser.add_argument("--max-norm", type=float, default=1.0, help="Maximum gradient norm for clipping")
+    parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu", help="Device to run the training on")
+    parser.add_argument("--model", type=str, default="mobilenet", choices=["resnet", "mobilenet"], help="Model class name")
+    parser.add_argument("--save-frequency", type=int, default=4, help="Frequency of saving model weights")
+    parser.add_argument("--logs-dir", type=str, default="resnet-logs", choices=["resnet-logs", "mobilenet-logs"], help="???")
+    return parser.parse_args()
+def main() -> None:
+    args = parse_args()
+    with open(args.labels_path, "r") as fp:
+        labels = json.load(fp)
+    num_classes = len(labels)
+    logs_dir = Path(args.logs_dir)
+    logs_dir.mkdir(exist_ok=True)
+    wandb.init(project=args.project_name, dir=logs_dir)
+    train_dataset = PlantsDataset(root=args.train_root, load_to_ram=args.load_to_ram, transform=train_transform, labels=labels)
+    test_dataset = PlantsDataset(root=args.test_root, load_to_ram=args.load_to_ram, transform=test_transform, labels=labels)
+    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=args.pin_memory, num_workers=args.num_workers)
+    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=args.pin_memory, num_workers=args.num_workers)
+    device = torch.device(args.device)
+    if args.model == "resnet":
+        model = ResNet(weights_path=args.weights_path)
+        model.fc = nn.Linear(512 * model.expansion, num_classes)
+        nn.init.xavier_uniform_(model.fc.weight)
+        for name, param in model.named_parameters():
+            if "layer4" in name or "fc" in name:
+                param.requires_grad = True
+        else:
+            param.requires_grad = False
+    elif args.model == "mobilenet":
+        model = MobileNetV2(weights_path=args.weights_path)
+        num_ftrs = model.classifier[1].in_features
+        model.classifier[1] = nn.Linear(num_ftrs, num_classes)
+        nn.init.xavier_uniform_(model.classifier[1].weight)
+        for name, param in model.named_parameters():
+            if "classifier" or "features.18" or "features.17" in name:
+                param.requires_grad = True
+            else:
+                param.requires_grad = False
+    model = model.to(device)
+    optimizer_class = getattr(torch.optim, args.optimizer)
+    optimizer = optimizer_class(model.parameters(), lr=args.learning_rate)
+    criterion_class = getattr(nn, args.criterion)
+    criterion = criterion_class()
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs)
+    best_accuracy = 0
+    train_loss_ema, train_accuracy_ema, grad_norm_ema = EMA(), EMA(), EMA()
+    for epoch in range(1, args.num_epochs + 1):
+        model.train()
+        pbar = tqdm(train_loader, desc=f"Train epoch {epoch}/{args.num_epochs}")
+        for images, labels in pbar:
+            images = images.to(device)
+            labels = labels.to(device)
+            optimizer.zero_grad()
+            logits = model(images)
+            loss = criterion(logits, labels)
+            loss.backward()
+            grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=args.max_norm).item()
+            optimizer.step()
+            train_loss = loss.item()
+            train_accuracy = (logits.argmax(dim=1) == labels).sum().item() / logits.shape[0]
+            pbar.set_postfix({"loss": train_loss_ema(train_loss), "accuracy": train_accuracy_ema(train_accuracy), "grad_norm": grad_norm_ema(grad_norm)})
+            wandb.log(
+                {
+                    "train/epoch": epoch,
+                    "train/loss": train_loss,
+                    "train/accuracy": train_accuracy,
+                    "train/learning_rate": optimizer.param_groups[0]["lr"],
+                    "train/grad_norm": grad_norm,
+                }
+            )
+        model.eval()
+        test_loss, test_accuracy = 0.0, 0.0
+        with torch.no_grad():
+            pbar = tqdm(test_loader, desc=f"Val epoch {epoch}/{args.num_epochs}")
+            for images, labels in pbar:
+                images = images.to(device)
+                labels = labels.to(device)
+                logits = model(images)
+                loss = criterion(logits, labels)
+                test_loss += loss.item()
+                test_accuracy += (logits.argmax(dim=1) == labels).sum().item()
+        test_loss /= len(test_loader)
+        test_accuracy /= len(test_loader.dataset)
+        print(f"loss: {test_loss:.3f}, accuracy: {test_accuracy:.3f}")
+        wandb.log(
+            {
+                "val/epoch": epoch,
+                "val/test_loss": test_loss,
+                "val/test_accuracy": test_accuracy,
+            }
+        )
+        scheduler.step()
+        if test_accuracy > best_accuracy:
+            best_accuracy = test_accuracy
+            torch.save(model.state_dict(), logs_dir / f"checkpoint-best-{epoch:09}.pth")
+        elif epoch % args.save_frequency == 0:
+            torch.save(model.state_dict(), logs_dir / f"checkpoint-{epoch:09}.pth")
+    wandb.finish()
+if __name__ == "__main__":
+    main()

src/utils.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torchvision.transforms as T
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+train_transform = T.Compose([
+    T.RandomRotation(degrees=15),
+    T.RandomResizedCrop(224, scale=(0.5, 1.0)),
+    T.RandomHorizontalFlip(),
+    T.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
+    T.ToTensor(),
+    T.Normalize(mean=mean, std=std),
+])
+test_transform = T.Compose([
+    T.Resize(256),
+    T.CenterCrop(224),
+    T.ToTensor(),
+    T.Normalize(mean=mean, std=std),
+])
+class EMA:
+    def __init__(self, alpha: float = 0.9) -> None:
+        self.value = None
+        self.alpha = alpha
+    def __call__(self, value: float) -> float:
+        if self.value is None:
+            self.value = value
+        else:
+            self.value = self.alpha * self.value + (1 - self.alpha) * value
+        return self.value

weights/checkpoint-best-mobilenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:126f0aab718f57f32e7b5c05898bc65f767c393d2ecb1dcc4a50d220d33a9b80
+size 9300442

weights/checkpoint-best-resnet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3660126925b0296a4b2a64d0248eeea8d36ab3f5bb9e596c89a76d789c11470e
+size 94601530

weights/download_checkpoints.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ wget https://huggingface.co/eksemyashkina/plants-classification/resolve/main/checkpoint-best-mobilenet.pth
2	+ wget https://huggingface.co/eksemyashkina/plants-classification/resolve/main/checkpoint-best-resnet.pth

weights/download_pretrained.py ADDED Viewed

File without changes

weights/mobilenet_v2-b0353104.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b03531047ffacf1e2488318dcd2aba1126cde36e3bfe1aa5cb07700aeeee9889
+size 14212972

weights/resnet50-0676ba61.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0676ba61b6795bbe1773cffd859882e5e297624d384b6993f7c9e683e722fb8a
+size 102530333