Spaces:

K00B404
/

pix2pix_flux_train

Running

App Files Files Community

K00B404 commited on Oct 23, 2024

Commit

97c17f0

verified ·

1 Parent(s): 833a3af

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -187

app.py CHANGED Viewed

@@ -31,8 +31,8 @@ model_repo_id = "K00B404/pix2pix_flux"
 # Global model variable
 global_model = None
-# clip
-clip_model,clip_tokenizer = load_clip()
 def load_model():
     """Load the models at startup"""
@@ -53,10 +53,6 @@ def load_model():
         global_model = model
         return model
-import os
-import pandas as pd
 class Pix2PixDataset(torch.utils.data.Dataset):
     def __init__(self, combined_data, transform, clip_tokenizer):
         self.data = combined_data
@@ -91,97 +87,11 @@ class Pix2PixDataset(torch.utils.data.Dataset):
         return original, target, original_tokens, enhanced_tokens
-class Pix2PixDataset_older(torch.utils.data.Dataset):
-    def __init__(self, ds, transform, clip_tokenizer, csv_path='combined_data.csv'):
-        if not os.path.exists(csv_path):
-            os.system('wget https://huggingface.co/datasets/K00B404/pix2pix_flux_set/resolve/main/combined_data.csv')
-        self.data = pd.read_csv(csv_path)
-        self.clip_tokenizer = clip_tokenizer
-        self.originals = [x for x in ds["train"] if x['label'] == 0]
-        self.targets = [x for x in ds["train"] if x['label'] == 1]
-        assert len(self.originals) == len(self.targets)
-        print(f"Number of original images: {len(self.originals)}")
-        print(f"Number of target images: {len(self.targets)}")
-        # Debugging: Print out filenames from the dataset and CSV
-        print("Dataset Original Filenames:")
-        for original in self.originals:
-            print(original['image'].filename)
-        print("\nCSV Image Filenames:")
-        print(self.data['image_path'].unique())
-        self.transform = transform
-    def __len__(self):
-        return len(self.originals)
-    def __getitem__(self, idx):
-        original_img = self.originals[idx]['image']
-        target_img = self.targets[idx]['image']
-        # Convert PIL images
-        original = original_img.convert('RGB')
-        target = target_img.convert('RGB')
-        # Extract the filename from the image_path in the CSV
-        original_img_path = self.data.iloc[idx]['image_path']
-        original_img_filename = os.path.basename(original_img_path)
-        # Match the image filename with the `image_path` column in the CSV
-        matched_row = self.data[self.data['image_path'].str.endswith(original_img_filename)]
-        if matched_row.empty:
-            raise ValueError(f"No matching entry found in the CSV for image {original_img_filename}")
-        # Get the prompts from the matched row
-        original_prompt = matched_row['original_prompt'].values[0]
-        enhanced_prompt = matched_row['enhanced_prompt'].values[0]
-        # Tokenize the prompts using CLIP tokenizer
-        original_tokens = self.clip_tokenizer(original_prompt, return_tensors="pt", padding=True, truncation=True, max_length=77)
-        enhanced_tokens = self.clip_tokenizer(enhanced_prompt, return_tensors="pt", padding=True, truncation=True, max_length=77)
-        # Return transformed images and tokenized prompts
-        return self.transform(original), self.transform(target), original_tokens, enhanced_tokens
-# Dataset class remains the same
-class Pix2PixDataset_old(torch.utils.data.Dataset):
-    def __init__(self, ds, transform, csv_path='combined_data.csv'):
-        if not os.path.exists(csv_path):
-            os.system('wget https://huggingface.co/datasets/K00B404/pix2pix_flux_set/resolve/main/combined_data.csv')
-        self.data = pd.read_csv(csv_path)
-        self.clip_tokenizer = clip_tokenizer
-        self.originals = [x for x in ds["train"] if x['label'] == 0]
-        self.targets = [x for x in ds["train"] if x['label'] == 1]
-        assert len(self.originals) == len(self.targets)
-        print(f"Number of original images: {len(self.originals)}")
-        print(f"Number of target images: {len(self.targets)}")
-        self.transform = transform
-    def __len__(self):
-        return len(self.originals)
-    def __getitem__(self, idx):
-        original_img = self.originals[idx]['image']
-        # TODO: get original_img file name and match with image_path in self.data....then tokenize the prompts with clip_tokenizer
-        target_img = self.targets[idx]['image']
-        original = original_img.convert('RGB')
-        target = target_img.convert('RGB')
-        return self.transform(original), self.transform(target)
 class UNetWrapper:
     def __init__(self, unet_model, repo_id):
         self.model = unet_model
         self.repo_id = repo_id
-        self.token = os.getenv('NEW_TOKEN') # Make sure this environment variable is set
         self.api = HfApi(token=os.getenv('NEW_TOKEN'))
     def push_to_hub(self):
@@ -197,7 +107,7 @@ class UNetWrapper:
             }
             # Save model locally
-            pth_name = 'big_model_weights.pth' if big else  'small_model_weights.pth'
             torch.save(save_dict, pth_name)
             # Create repo if it doesn't exist
@@ -260,9 +170,13 @@ model.eval()
 {str(self.model)} """
             rp(model_card)
             # Save and upload README
             with open("README.md", "w") as f:
-                f.write(model_card)
             self.api.upload_file(
                 path_or_fileobj="README.md",
@@ -280,7 +194,6 @@ model.eval()
         except Exception as e:
             print(f"Error uploading model: {e}")
 def prepare_input(image, device='cpu'):
     """Prepare image for inference"""
@@ -315,8 +228,6 @@ def to_hub(model):
     wrapper = UNetWrapper(model, model_repo_id)
     wrapper.push_to_hub()
 def train_model(epochs):
     """Training function"""
     global global_model
@@ -355,13 +266,10 @@ def train_model(epochs):
             # Compute image reconstruction loss
             img_loss = criterion(output, original)
-            rp(f"Image {i} Loss:{imag_loss}")
-            #print(f"Enhanced Prompt Tokens Type: {enhanced_prompt_tokens.dtype}, Shape: {enhanced_prompt_tokens.shape}")
-            # Compute prompt guidance loss (L2 norm between original and enhanced prompt embeddings)
-            #prompt_loss = torch.norm(original_prompt_tokens - enhanced_prompt_tokens, p=2)
             # Combine losses
-            total_loss = img_loss #+ 0.1 * prompt_loss  # Weight the prompt guidance loss with 0.1 to balance
             total_loss.backward()
             # Optimizer step
@@ -377,62 +285,6 @@ def train_model(epochs):
     global_model = model  # Update the global model after training
     return model, "\n".join(output_text)
-def train_model_old(epochs):
-    """Training function"""
-    global global_model
-    ds = load_dataset(dataset_id)
-    transform = transforms.Compose([
-        transforms.Resize((IMG_SIZE, IMG_SIZE)),
-        transforms.ToTensor(),
-    ])
-    # Initialize the dataset and dataloader
-    dataset = Pix2PixDataset(ds, transform, clip_tokenizer)
-    dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
-    model = global_model
-    criterion = nn.L1Loss()  # L1 loss for image reconstruction
-    optimizer = optim.Adam(model.parameters(), lr=LR)
-    output_text = []
-    for epoch in range(epochs):
-        model.train()
-        for i, (original, target, original_prompt_tokens, enhanced_prompt_tokens) in enumerate(dataloader):
-            # Move images and prompt embeddings to the appropriate device (CPU or GPU)
-            original, target = original.to(device), target.to(device)
-            original_prompt_tokens = original_prompt_tokens.input_ids.to(device)
-            enhanced_prompt_tokens = enhanced_prompt_tokens.input_ids.to(device)
-            optimizer.zero_grad()
-            # Forward pass through the model
-            output = model(target)
-            # Compute image reconstruction loss
-            img_loss = criterion(output, original)
-            # Compute prompt guidance loss (L2 norm between original and enhanced prompt embeddings)
-            prompt_loss = torch.norm(original_prompt_tokens - enhanced_prompt_tokens, p=2)
-            # Combine losses
-            total_loss = img_loss + 0.1 * prompt_loss  # Weight the prompt guidance loss with 0.1 to balance
-            total_loss.backward()
-            # Optimizer step
-            optimizer.step()
-            if i % 10 == 0:
-                status = f"Epoch [{epoch}/{epochs}], Step [{i}/{len(dataloader)}], Loss: {total_loss.item():.8f}"
-                rp(status)
-                output_text.append(status)
-        # Push model to Hugging Face Hub at the end of each epoch
-        to_hub(model)
-    global_model = model  # Update the global model after training
-    return model, "\n".join(output_text)
 def gradio_train(epochs):
     """Gradio training interface function"""
@@ -440,15 +292,6 @@ def gradio_train(epochs):
     to_hub(model)
     return f"{training_log}\n\nModel trained for {epochs} epochs and pushed to {model_repo_id}"
-def gradio_inference(input_image, keywords):
-    """Gradio inference interface function"""
-    # Generate an enhanced prompt using the chat bot
-    enhanced_prompt = chat_with_bot(keywords)
-    # Run inference on the input image
-    output_image = run_inference(input_image, chat_with_bot(keywords))
-    return input_image, output_image, keywords, enhanced_prompt
 def gradio_inference(input_image):
     """Gradio inference interface function"""
     return input_image, run_inference(input_image)
@@ -457,23 +300,18 @@ def gradio_inference(input_image):
 with gr.Blocks() as app:
     gr.Markdown("# Pix2Pix Model Training and Inference")
-    with gr.Tabs():
-        with gr.TabItem("Training"):
-            epochs_input = gr.Number(label="Number of Epochs")
-            train_button = gr.Button("Train Model")
-            output_text = gr.Textbox(label="Training Progress", lines=10)
-            train_button.click(gradio_train, inputs=epochs_input, outputs=output_text)
-        with gr.TabItem("Inference"):
-            with gr.Row():
-                input_image = gr.Image(label="Input Image")
-                output_image = gr.Image(label="Model Output")
-            infer_button = gr.Button("Run Inference")
-            infer_button.click(gradio_inference, inputs=input_image, outputs=[input_image, output_image])
-if __name__ == '__main__':
-    # Load model at startup
-    load_model()
-    # Launch the Gradio app
-    app.launch()

 # Global model variable
 global_model = None
+# CLIP
+clip_model, clip_tokenizer = load_clip()
 def load_model():
     """Load the models at startup"""
         global_model = model
         return model
 class Pix2PixDataset(torch.utils.data.Dataset):
     def __init__(self, combined_data, transform, clip_tokenizer):
         self.data = combined_data
         return original, target, original_tokens, enhanced_tokens
 class UNetWrapper:
     def __init__(self, unet_model, repo_id):
         self.model = unet_model
         self.repo_id = repo_id
+        self.token = os.getenv('NEW_TOKEN')  # Make sure this environment variable is set
         self.api = HfApi(token=os.getenv('NEW_TOKEN'))
     def push_to_hub(self):
             }
             # Save model locally
+            pth_name = 'big_model_weights.pth' if big else 'small_model_weights.pth'
             torch.save(save_dict, pth_name)
             # Create repo if it doesn't exist
 {str(self.model)} """
             rp(model_card)
             # Save and upload README
             with open("README.md", "w") as f:
+                f.write(f"# Pix2Pix UNet Model\n\n"
+                        f"- **Image Size:** {save_dict['model_config']['img_size']}\n"
+                        f"- **Model Type:** {'big' if big else 'small'}_UNet ({save_dict['model_config']['img_size']})\n"
+                        f"## Model Architecture\n{str(self.model)}")
             self.api.upload_file(
                 path_or_fileobj="README.md",
         except Exception as e:
             print(f"Error uploading model: {e}")
 def prepare_input(image, device='cpu'):
     """Prepare image for inference"""
     wrapper = UNetWrapper(model, model_repo_id)
     wrapper.push_to_hub()
 def train_model(epochs):
     """Training function"""
     global global_model
             # Compute image reconstruction loss
             img_loss = criterion(output, original)
+            rp(f"Image {i} Loss:{img_loss}")
             # Combine losses
+            total_loss = img_loss  # Add any other losses if necessary
             total_loss.backward()
             # Optimizer step
     global_model = model  # Update the global model after training
     return model, "\n".join(output_text)
 def gradio_train(epochs):
     """Gradio training interface function"""
     to_hub(model)
     return f"{training_log}\n\nModel trained for {epochs} epochs and pushed to {model_repo_id}"
 def gradio_inference(input_image):
     """Gradio inference interface function"""
     return input_image, run_inference(input_image)
 with gr.Blocks() as app:
     gr.Markdown("# Pix2Pix Model Training and Inference")
+    with gr.Tab("Train"):
+        epochs_input = gr.Number(value=EPOCHS, label="Number of epochs")
+        train_button = gr.Button("Train")
+        training_output = gr.Textbox(label="Training Log", interactive=False)
+        train_button.click(gradio_train, inputs=[epochs_input], outputs=[training_output])
+    with gr.Tab("Inference"):
+        image_input = gr.Image(type='numpy')
+        prompt_input = gr.Textbox(label="Prompt")
+        inference_button = gr.Button("Generate")
+        inference_output = gr.Image(type='numpy', label="Generated Image")
+        inference_button.click(gradio_inference, inputs=[image_input], outputs=[inference_output])
+load_model()
+app.launch()