Spaces:

K00B404
/

pix2pix_flux_train

Running

File size: 8,825 Bytes

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from datasets import load_dataset
from huggingface_hub import Repository
from huggingface_hub import HfApi, HfFolder, Repository, create_repo
import os
import gradio as gr
from PIL import Image
import numpy as np
from small_256_model import UNet as small_UNet
from big_1024_model import UNet as big_UNet

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
big = True if device == torch.device('cpu') else False

# Parameters
IMG_SIZE = 1024 if big else 256
BATCH_SIZE = 1 if big else 4
EPOCHS = 12
LR = 0.0002
dataset_id = "K00B404/pix2pix_flux_set"
model_repo_id = "K00B404/pix2pix_flux"

# Global model variable
global_model = None

def load_model():
    """Load the model at startup"""
    global global_model
    weights_name = 'big_model_weights.pth' if big else 'small_model_weights.pth'
    try:
        checkpoint = torch.load(weights_name, map_location=device)
        model = big_UNet() if checkpoint['model_config']['big'] else small_UNet()
        model.load_state_dict(checkpoint['model_state_dict'])
        model.to(device)
        model.eval()
        global_model = model
        print("Model loaded successfully!")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        model = big_UNet().to(device) if big else small_UNet().to(device)
        global_model = model
        return model

# Dataset class remains the same
class Pix2PixDataset(torch.utils.data.Dataset):
    def __init__(self, ds, transform):
        self.originals = [x for x in ds["train"] if x['label'] == 0]
        self.targets = [x for x in ds["train"] if x['label'] == 1]
        assert len(self.originals) == len(self.targets)
        print(f"Number of original images: {len(self.originals)}")
        print(f"Number of target images: {len(self.targets)}")
        self.transform = transform

    def __len__(self):
        return len(self.originals)

    def __getitem__(self, idx):
        original_img = self.originals[idx]['image']
        target_img = self.targets[idx]['image']
        original = original_img.convert('RGB')
        target = target_img.convert('RGB')
        return self.transform(original), self.transform(target)

class UNetWrapper:
    def __init__(self, unet_model, repo_id):
        self.model = unet_model
        self.repo_id = repo_id
        self.token = os.getenv('NEW_TOKEN') # Make sure this environment variable is set
        self.api = HfApi(token=os.getenv('NEW_TOKEN'))

    def push_to_hub(self):
        try:
            # Save model state and configuration
            save_dict = {
                'model_state_dict': self.model.state_dict(),
                'model_config': {
                    'big': isinstance(self.model, big_UNet),
                    'img_size': 1024 if isinstance(self.model, big_UNet) else 256
                },
                'model_architecture': str(self.model)
            }
            
            # Save model locally
            pth_name = 'big_model_weights.pth' if big else  'small_model_weights.pth'
            torch.save(save_dict, pth_name)
            
            # Create repo if it doesn't exist
            try:
                create_repo(
                    repo_id=self.repo_id, 
                    token=self.token,
                    exist_ok=True
                )
            except Exception as e:
                print(f"Repository creation note: {e}")
            
            # Upload the model file
            self.api.upload_file(
                path_or_fileobj=pth_name,
                path_in_repo=pth_name,
                repo_id=self.repo_id,
                token=self.token,
                repo_type="model"
            )
            
            # Create and upload model card
            model_card = f"""---
tags:
- unet
- pix2pix
- pytorch
library_name: pytorch
license: wtfpl
datasets:
- K00B404/pix2pix_flux_set
language:
- en
pipeline_tag: image-to-image
---

# Pix2Pix UNet Model

## Model Description
Custom UNet model for Pix2Pix image translation.
- **Image Size:** 1024
- **Model Type:** Big (1024)

## Usage

```python
import torch
from small_256_model import UNet as small_UNet
from big_1024_model import UNet as big_UNet
big = True
# Load the model
name='big_model_weights.pth' if big else 'small_model_weights.pth'
checkpoint = torch.load(name)
model = big_UNet() if checkpoint['model_config']['big'] else small_UNet()
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

Model Architecture

{str(self.model)} """
            # Save and upload README
            with open("README.md", "w") as f:
                f.write(model_card)
            
            self.api.upload_file(
                path_or_fileobj="README.md",
                path_in_repo="README.md",
                repo_id=self.repo_id,
                token=self.token,
                repo_type="model"
            )
            
            # Clean up local files
            os.remove(pth_name)
            os.remove("README.md")
            
            print(f"Model successfully uploaded to {self.repo_id}")
            
        except Exception as e:
            print(f"Error uploading model: {e}")
            

def prepare_input(image, device='cpu'):
    """Prepare image for inference"""
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
    ])
    
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image)
    input_tensor = transform(image).unsqueeze(0).to(device)
    return input_tensor

def run_inference(image):
    """Run inference on a single image"""
    global global_model
    if global_model is None:
        return "Error: Model not loaded"
    
    global_model.eval()
    input_tensor = prepare_input(image, device)
    
    with torch.no_grad():
        output = global_model(input_tensor)
    
    # Convert output to image
    output = output.cpu().squeeze(0).permute(1, 2, 0).numpy()
    output = ((output - output.min()) / (output.max() - output.min()) * 255).astype(np.uint8)
    return output
    
def to_hub(model):
    wrapper = UNetWrapper(model, model_repo_id)
    wrapper.push_to_hub()
    
def train_model(epochs):
    """Training function"""
    global global_model
    
    ds = load_dataset(dataset_id)
    transform = transforms.Compose([
        transforms.Resize((IMG_SIZE, IMG_SIZE)),
        transforms.ToTensor(),
    ])
    
    dataset = Pix2PixDataset(ds, transform)
    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

    model = global_model
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=LR)
    output_text = []
    
    for epoch in range(epochs):
        model.train()
        for i, (original, target) in enumerate(dataloader):
            original, target = original.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(target)
            loss = criterion(output, original)
            loss.backward()
            optimizer.step()

            if i % 10 == 0:
                status = f"Epoch [{epoch}/{epochs}], Step [{i}/{len(dataloader)}], Loss: {loss.item():.8f}"
                print(status)
                output_text.append(status)
        
        to_hub(model)
        
    global_model = model
    return model, "\n".join(output_text)


def gradio_train(epochs):
    """Gradio training interface function"""
    model, training_log = train_model(int(epochs))
    to_hub(model)
    return f"{training_log}\n\nModel trained for {epochs} epochs and pushed to {model_repo_id}"

def gradio_inference(input_image):
    """Gradio inference interface function"""
    return input_image, run_inference(input_image)

# Create Gradio interface with tabs
with gr.Blocks() as app:
    gr.Markdown("# Pix2Pix Model Training and Inference")
    
    with gr.Tabs():
        with gr.TabItem("Training"):
            epochs_input = gr.Number(label="Number of Epochs")
            train_button = gr.Button("Train Model")
            output_text = gr.Textbox(label="Training Progress", lines=10)
            train_button.click(gradio_train, inputs=epochs_input, outputs=output_text)
        
        with gr.TabItem("Inference"):
            with gr.Row():
                input_image = gr.Image(label="Input Image")
                output_image = gr.Image(label="Model Output")
            infer_button = gr.Button("Run Inference")
            infer_button.click(gradio_inference, inputs=input_image, outputs=[input_image, output_image])

if __name__ == '__main__':
    # Load model at startup
    load_model()
    
    # Launch the Gradio app
    app.launch()