In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.common.utils.utils import setup_outputdir
from autogluon.core.utils.loaders import load_pkl
from autogluon.core.utils.savers import save_pkl
import os.path
import os
import pandas as pd
from PIL import Image
import torch
from transformers import ViTModel, ViTFeatureExtractor
import pickle

class MultilabelPredictor:
 """ Tabular Predictor for predicting multiple columns in table.
 Creates multiple TabularPredictor objects which you can also use individually.
 You can access the TabularPredictor for a particular label via: `multilabel_predictor.get_predictor(label_i)`

 Parameters
 ----------
 labels : List[str]
 The ith element of this list is the column (i.e. `label`) predicted by the ith TabularPredictor stored in this object.
 path : str, default = None
 Path to directory where models and intermediate outputs should be saved.
 If unspecified, a time-stamped folder called "AutogluonModels/ag-[TIMESTAMP]" will be created in the working directory to store all models.
 Note: To call `fit()` twice and save all results of each fit, you must specify different `path` locations or don't specify `path` at all.
 Otherwise files from first `fit()` will be overwritten by second `fit()`.
 Caution: when predicting many labels, this directory may grow large as it needs to store many TabularPredictors.
 problem_types : List[str], default = None
 The ith element is the `problem_type` for the ith TabularPredictor stored in this object.
 eval_metrics : List[str], default = None
 The ith element is the `eval_metric` for the ith TabularPredictor stored in this object.
 consider_labels_correlation : bool, default = True
 Whether the predictions of multiple labels should account for label correlations or predict each label independently of the others.
 If True, the ordering of `labels` may affect resulting accuracy as each label is predicted conditional on the previous labels appearing earlier in this list (i.e. in an auto-regressive fashion).
 Set to False if during inference you may want to individually use just the ith TabularPredictor without predicting all the other labels.
 kwargs :
 Arguments passed into the initialization of each TabularPredictor.

 """

 multi_predictor_file = 'multilabel_predictor.pkl'

 def __init__(self, labels, path=None, problem_types=None, eval_metrics=None, consider_labels_correlation=True, **kwargs):
 if len(labels) < 2:
 raise ValueError("MultilabelPredictor is only intended for predicting MULTIPLE labels (columns), use TabularPredictor for predicting one label (column).")
 if (problem_types is not None) and (len(problem_types) != len(labels)):
 raise ValueError("If provided, `problem_types` must have same length as `labels`")
 if (eval_metrics is not None) and (len(eval_metrics) != len(labels)):
 raise ValueError("If provided, `eval_metrics` must have same length as `labels`")
 self.path = setup_outputdir(path, warn_if_exist=False)
 self.labels = labels
 self.consider_labels_correlation = consider_labels_correlation
 self.predictors = {} # key = label, value = TabularPredictor or str path to the TabularPredictor for this label
 if eval_metrics is None:
 self.eval_metrics = {}
 else:
 self.eval_metrics = {labels[i] : eval_metrics[i] for i in range(len(labels))}
 problem_type = None
 eval_metric = None
 for i in range(len(labels)):
 label = labels[i]
 path_i = os.path.join(self.path, "Predictor_" + str(label))
 if problem_types is not None:
 problem_type = problem_types[i]
 if eval_metrics is not None:
 eval_metric = eval_metrics[i]
 self.predictors[label] = TabularPredictor(label=label, problem_type=problem_type, eval_metric=eval_metric, path=path_i, **kwargs)

 def fit(self, train_data, tuning_data=None, **kwargs):
 """ Fits a separate TabularPredictor to predict each of the labels.

 Parameters
 ----------
 train_data, tuning_data : str or autogluon.tabular.TabularDataset or pd.DataFrame
 See documentation for `TabularPredictor.fit()`.
 kwargs :
 Arguments passed into the `fit()` call for each TabularPredictor.
 """
 if isinstance(train_data, str):
 train_data = TabularDataset(train_data)
 if tuning_data is not None and isinstance(tuning_data, str):
 tuning_data = TabularDataset(tuning_data)
 train_data_og = train_data.copy()
 if tuning_data is not None:
 tuning_data_og = tuning_data.copy()
 else:
 tuning_data_og = None
 save_metrics = len(self.eval_metrics) == 0
 for i in range(len(self.labels)):
 label = self.labels[i]
 predictor = self.get_predictor(label)
 if not self.consider_labels_correlation:
 labels_to_drop = [l for l in self.labels if l != label]
 else:
 labels_to_drop = [self.labels[j] for j in range(i+1, len(self.labels))]
 train_data = train_data_og.drop(labels_to_drop, axis=1)
 if tuning_data is not None:
 tuning_data = tuning_data_og.drop(labels_to_drop, axis=1)
 print(f"Fitting TabularPredictor for label: {label} ...")
 predictor.fit(train_data=train_data, tuning_data=tuning_data, **kwargs)
 self.predictors[label] = predictor.path
 if save_metrics:
 self.eval_metrics[label] = predictor.eval_metric
 self.save()

 def predict(self, data, **kwargs):
 """ Returns DataFrame with label columns containing predictions for each label.

 Parameters
 ----------
 data_copy : str or autogluon.tabular.TabularDataset or pd.DataFrame
 Data to make predictions for. If label columns are present in this data, they will be ignored. See documentation for `TabularPredictor.predict()`.
 kwargs :
 Arguments passed into the predict() call for each TabularPredictor.
 """
 return self._predict(data, as_proba=False, **kwargs)

 def predict_proba(self, data, **kwargs):
 """ Returns dict where each key is a label and the corresponding value is the `predict_proba()` output for just that label.

 Parameters
 ----------
 data : str or autogluon.tabular.TabularDataset or pd.DataFrame
 Data to make predictions for. See documentation for `TabularPredictor.predict()` and `TabularPredictor.predict_proba()`.
 kwargs :
 Arguments passed into the `predict_proba()` call for each TabularPredictor (also passed into a `predict()` call).
 """
 return self._predict(data, as_proba=True, **kwargs)

 def evaluate(self, data, **kwargs):
 """ Returns dict where each key is a label and the corresponding value is the `evaluate()` output for just that label.

 Parameters
 ----------
 data : str or autogluon.tabular.TabularDataset or pd.DataFrame
 Data to evalate predictions of all labels for, must contain all labels as columns. See documentation for `TabularPredictor.evaluate()`.
 kwargs :
 Arguments passed into the `evaluate()` call for each TabularPredictor (also passed into the `predict()` call).
 """
 data = self._get_data(data)
 eval_dict = {}
 for label in self.labels:
 print(f"Evaluating TabularPredictor for label: {label} ...")
 predictor = self.get_predictor(label)
 eval_dict[label] = predictor.evaluate(data, **kwargs)
 if self.consider_labels_correlation:
 data[label] = predictor.predict(data, **kwargs)
 return eval_dict

 def save(self):
 """ Save MultilabelPredictor to disk. """
 for label in self.labels:
 if not isinstance(self.predictors[label], str):
 self.predictors[label] = self.predictors[label].path
 save_pkl.save(path=os.path.join(self.path, self.multi_predictor_file), object=self)
 print(f"MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('{self.path}')")

 @classmethod
 def load(cls, path):
 """ Load MultilabelPredictor from disk `path` previously specified when creating this MultilabelPredictor. """
 path = os.path.expanduser(path)
 return load_pkl.load(path=os.path.join(path, cls.multi_predictor_file))

 def get_predictor(self, label):
 """ Returns TabularPredictor which is used to predict this label. """
 predictor = self.predictors[label]
 if isinstance(predictor, str):
 return TabularPredictor.load(path=predictor)
 return predictor

 def _get_data(self, data):
 if isinstance(data, str):
 return TabularDataset(data)
 return data.copy()

 def _predict(self, data, as_proba=False, **kwargs):
 data = self._get_data(data)
 if as_proba:
 predproba_dict = {}
 for label in self.labels:
 print(f"Predicting with TabularPredictor for label: {label} ...")
 predictor = self.get_predictor(label)
 if as_proba:
 predproba_dict[label] = predictor.predict_proba(data, as_multiclass=True, **kwargs)
 data[label] = predictor.predict(data, **kwargs)
 if not as_proba:
 return data[self.labels]
 else:
 return predproba_dict

def extract_image_embeddings_batch(image_paths):
 """Extract embeddings for a batch of images using Vision Transformer."""
 images = []
 
 # Load and preprocess all images in the batch
 for image_path in image_paths:
 image = Image.open(image_path).convert("RGB")
 images.append(image)
 
 # Prepare inputs as a batch
 inputs = feature_extractor(images=images, return_tensors="pt", padding=True).to(device)
 
 # Get embeddings in a single forward pass
 with torch.no_grad():
 outputs = vit_model(**inputs)
 
 # Compute mean embeddings for each image in the batch
 return outputs.last_hidden_state.mean(dim=1).cpu().numpy()

def preprocess_images(df, image_dir, image_column='id', batch_size=512):
 """Generate image embeddings for all rows in a DataFrame in batches."""
 embeddings = []
 n = len(df)
 
 for i in range(0, n, batch_size):
 # Get the current batch of image paths
 batch = df.iloc[i:i+batch_size]
 image_paths = [os.path.join(image_dir, f"{int(row[image_column])}.jpeg") for _, row in batch.iterrows()]
 # Extract embeddings for the batch
 batch_embeddings = extract_image_embeddings_batch(image_paths)
 embeddings.extend(batch_embeddings)
 
 print(f"Processed batch {i//batch_size + 1}/{(n + batch_size - 1)//batch_size}")
 # Convert to DataFrame
 return pd.DataFrame(embeddings, index=df.index)

 from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Define paths
train_csv_path = 'train.csv'
train_image_dir = 'train_images'
test_csv_path = 'test.csv'
test_image_dir = 'test_images'
output_path = 'prediction.csv'

# Load train and test datasets
train_df = pd.read_csv(train_csv_path)

# Columns for ancillary data and target traits
ancillary_columns = train_df.columns[:-6] # First 164 columns are ancillary data
target_columns = train_df.columns[-6:] # Last 6 columns are target traits

# Load Vision Transformer model and feature extractor
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# vit_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k').to(device)
# feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

# Generate image embeddings for train and test datasets
print("Extracting image embeddings for training data...")
# train_image_embeddings = preprocess_images(train_df, train_image_dir)
with open('train_image_embeddings.pkl', 'rb') as f:
 train_image_embeddings = pickle.load(f)

# Combine ancillary data and image embeddings
print("Combining ancillary data and image embeddings...")
train_combined = pd.concat([train_df[ancillary_columns], train_image_embeddings, train_df[target_columns]], axis=1)

# Initialize MultilabelPredictor
targets = list(target_columns)
problem_types = ['regression'] * len(targets)
eval_metrics = ['mean_absolute_percentage_error'] * len(targets)
hyperparameters = {
	'NN_TORCH': {},
	'GBM': ['GBMLarge'],
	'FASTAI': {}
}

multi_predictor = MultilabelPredictor(
 labels=targets,
 problem_types=problem_types,
 # eval_metrics=eval_metrics,
 path='multilabel_predictor_source'
)

# Train MultilabelPredictor
print("Training MultilabelPredictor...")
multi_predictor.fit(train_combined, hyperparameters=hyperparameters)


Extracting image embeddings for training data...
Combining ancillary data and image embeddings...


Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 5.11 GB / 15.79 GB (32.4%)
Disk Space Avail: 79.69 GB / 150.79 GB (52.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended Presets (For more details refer to https://auto.gluon.ai/stable/tutorials/tabular/tabular-essentials.html#presets):
	presets='best_quality' : Maximize accuracy. Default time_limit=3600.
	presets='high_quality' : Strong accuracy with fast inference speed. Default time_limit=3600.
	presets='good_quality' : Good accuracy with very fast inference speed. Default time_limit=3600.
	presets='medium_quality' : Fast training time, ideal for initial prototyping.


Training MultilabelPredictor...
Fitting TabularPredictor for label: X4_mean ...


	Consider setting `time_limit` to ensure training finishes within an expected duration or experiment with a small portion of `train_data` to identify an ideal `presets` and `hyperparameters` configuration.
Beginning AutoGluon training ...
AutoGluon will save models to "multilabel_predictor_source\Predictor_X4_mean"
Train Data Rows: 43363
Train Data Columns: 932
Label Column: X4_mean
Problem Type: regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory: 5219.75 MB
	Train Data (Original) Memory Usage: 181.30 MB (3.5% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator.

[1000]	valid_set's rmse: 0.10796
[2000]	valid_set's rmse: 0.107227
[3000]	valid_set's rmse: 0.106933
[4000]	valid_set's rmse: 0.106685
[5000]	valid_set's rmse: 0.106466
[6000]	valid_set's rmse: 0.106427
[7000]	valid_set's rmse: 0.106386
[8000]	valid_set's rmse: 0.106361
[9000]	valid_set's rmse: 0.106337
[10000]	valid_set's rmse: 0.106303


	-0.1063	 = Validation score (-root_mean_squared_error)
	863.4s	 = Training runtime
	0.93s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.108342
[2000]	valid_set's rmse: 0.107862
[3000]	valid_set's rmse: 0.107599
[4000]	valid_set's rmse: 0.107513
[5000]	valid_set's rmse: 0.107464
[6000]	valid_set's rmse: 0.107424
[7000]	valid_set's rmse: 0.107404
[8000]	valid_set's rmse: 0.107379
[9000]	valid_set's rmse: 0.107371
[10000]	valid_set's rmse: 0.107365


	-0.1074	 = Validation score (-root_mean_squared_error)
	1027.06s	 = Training runtime
	0.83s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.112	 = Validation score (-root_mean_squared_error)
	3077.41s	 = Training runtime
	0.22s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.1119	 = Validation score (-root_mean_squared_error)
	1255.77s	 = Training runtime
	0.24s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 2: early stopping
	-0.1104	 = Validation score (-root_mean_squared_error)
	135.6s	 = Training runtime
	0.28s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.1095	 = Validation score (-root_mean_squared_error)
	143.11s	 = Training runtime
	0.32s	 = Validation runtime
Fitting model: LightGBMLarge ...


[1000]	valid_set's rmse: 0.107068
[2000]	valid_set's rmse: 0.10661
[3000]	valid_set's rmse: 0.10653
[4000]	valid_set's rmse: 0.106503
[5000]	valid_set's rmse: 0.106497
[6000]	valid_set's rmse: 0.106495
[7000]	valid_set's rmse: 0.106495
[8000]	valid_set's rmse: 0.106495
[9000]	valid_set's rmse: 0.106495
[10000]	valid_set's rmse: 0.106495


	-0.1065	 = Validation score (-root_mean_squared_error)
	2938.26s	 = Training runtime
	1.38s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBMXT': 0.333, 'NeuralNetTorch': 0.238, 'LightGBMLarge': 0.238, 'NeuralNetFastAI': 0.095, 'KNeighborsDist': 0.048, 'LightGBM': 0.048}
	-0.1047	 = Validation score (-root_mean_squared_error)
	0.03s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9466.82s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 378.7 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X4_mean")
Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 5.24 GB / 15.79 GB (33.2%)
Disk Space Avail: 77.84 GB / 150.79 GB (51.6%)
No presets specified! To achieve stro

Fitting TabularPredictor for label: X11_mean ...


Fitting AutoMLPipelineFeatureGenerator...
	Available Memory: 5340.17 MB
	Train Data (Original) Memory Usage: 181.63 MB (3.4% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 811 | ['WORLDCLIM_BIO1_annual_mean_temperature', 'WORLDCLIM_BIO12_annual_precipitation', 'WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month', 'WORLDCLIM_BIO15_precipitation_seasonality', 'WORLDCLIM_BIO4_temperature_seasonality', ...]
		('int', []) : 122 | ['id', 'SOIL_bdod_0.5cm_mean_0.01_deg', 'SOIL_bdod_100.200c

[1000]	valid_set's rmse: 5.34109
[2000]	valid_set's rmse: 5.3167
[3000]	valid_set's rmse: 5.29916
[4000]	valid_set's rmse: 5.29677
[5000]	valid_set's rmse: 5.29458
[6000]	valid_set's rmse: 5.29489
[7000]	valid_set's rmse: 5.29236
[8000]	valid_set's rmse: 5.29263
[9000]	valid_set's rmse: 5.29315


	-5.2913	 = Validation score (-root_mean_squared_error)
	831.77s	 = Training runtime
	0.34s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 5.29744
[2000]	valid_set's rmse: 5.26782
[3000]	valid_set's rmse: 5.26091
[4000]	valid_set's rmse: 5.25295
[5000]	valid_set's rmse: 5.24923
[6000]	valid_set's rmse: 5.24709
[7000]	valid_set's rmse: 5.24592
[8000]	valid_set's rmse: 5.24511
[9000]	valid_set's rmse: 5.24443
[10000]	valid_set's rmse: 5.24422


	-5.2442	 = Validation score (-root_mean_squared_error)
	1007.46s	 = Training runtime
	0.8s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-5.466	 = Validation score (-root_mean_squared_error)
	3405.54s	 = Training runtime
	0.21s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-5.5053	 = Validation score (-root_mean_squared_error)
	1100.81s	 = Training runtime
	0.19s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
No improvement since epoch 8: early stopping
	-5.3575	 = Validation score (-root_mean_squared_error)
	156.5s	 = Training runtime
	0.26s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-5.3648	 = Validation score (-root_mean_squared_error)
	123.3s	 = Training runtime
	0.3s	 = Validation runtime
Fitting model: LightGBMLarge ...


[1000]	valid_set's rmse: 5.22467
[2000]	valid_set's rmse: 5.20862
[3000]	valid_set's rmse: 5.20477
[4000]	valid_set's rmse: 5.20326
[5000]	valid_set's rmse: 5.20295
[6000]	valid_set's rmse: 5.20281
[7000]	valid_set's rmse: 5.20276
[8000]	valid_set's rmse: 5.20275
[9000]	valid_set's rmse: 5.20275
[10000]	valid_set's rmse: 5.20275


	-5.2028	 = Validation score (-root_mean_squared_error)
	2423.97s	 = Training runtime
	1.28s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBMLarge': 0.417, 'NeuralNetFastAI': 0.375, 'LightGBM': 0.208}
	-5.0914	 = Validation score (-root_mean_squared_error)
	0.02s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9074.56s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1068.5 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X11_mean")
Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 7.64 GB / 15.79 GB (48.4%)
Disk Space Avail: 75.99 GB / 150.79 GB (50.4%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available pre

Fitting TabularPredictor for label: X18_mean ...


Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory: 7901.67 MB
	Train Data (Original) Memory Usage: 181.96 MB (2.3% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 812 | ['WORLDCLIM_BIO1_annual_mean_temperature', 'WORLDCLIM_BIO12_annual_precipitation', 'WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month', 'WORLDCLIM_BIO15_precipitation_seasonality', 'WORLDCLIM_BIO4_temperature_seasonality', ...]
		('int', []) : 122 | ['id', 

[1000]	valid_set's rmse: 2.7975
[2000]	valid_set's rmse: 2.77084
[3000]	valid_set's rmse: 2.76197
[4000]	valid_set's rmse: 2.76049
[5000]	valid_set's rmse: 2.75914
[6000]	valid_set's rmse: 2.75773
[7000]	valid_set's rmse: 2.75728
[8000]	valid_set's rmse: 2.75624
[9000]	valid_set's rmse: 2.75584
[10000]	valid_set's rmse: 2.75552


	-2.7555	 = Validation score (-root_mean_squared_error)
	722.76s	 = Training runtime
	0.62s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 2.79461
[2000]	valid_set's rmse: 2.77581
[3000]	valid_set's rmse: 2.76911
[4000]	valid_set's rmse: 2.76665
[5000]	valid_set's rmse: 2.76656


	-2.7665	 = Validation score (-root_mean_squared_error)
	455.92s	 = Training runtime
	0.25s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-3.0041	 = Validation score (-root_mean_squared_error)
	5707.16s	 = Training runtime
	0.29s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-3.0281	 = Validation score (-root_mean_squared_error)
	1414.74s	 = Training runtime
	0.24s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-2.7646	 = Validation score (-root_mean_squared_error)
	158.74s	 = Training runtime
	0.24s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-2.7368	 = Validation score (-root_mean_squared_error)
	132.61s	 = Training runtime
	0.27s	 = Validation runtime
Fitting model: LightGBMLarge ...


[1000]	valid_set's rmse: 2.76306
[2000]	valid_set's rmse: 2.75877
[3000]	valid_set's rmse: 2.75837
[4000]	valid_set's rmse: 2.75822
[5000]	valid_set's rmse: 2.75819
[6000]	valid_set's rmse: 2.75819
[7000]	valid_set's rmse: 2.75818
[8000]	valid_set's rmse: 2.75818
[9000]	valid_set's rmse: 2.75818
[10000]	valid_set's rmse: 2.75818


	-2.7582	 = Validation score (-root_mean_squared_error)
	2648.19s	 = Training runtime
	1.43s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'NeuralNetTorch': 0.375, 'NeuralNetFastAI': 0.333, 'LightGBMLarge': 0.167, 'LightGBM': 0.125}
	-2.6075	 = Validation score (-root_mean_squared_error)
	0.03s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 11264.22s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1140.4 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X18_mean")
Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 7.60 GB / 15.79 GB (48.1%)
Disk Space Avail: 74.16 GB / 150.79 GB (49.2%)
No presets specified! To achieve strong results with AutoGluon, it is recommende

Fitting TabularPredictor for label: X26_mean ...


Fitting AutoMLPipelineFeatureGenerator...
	Available Memory: 7763.00 MB
	Train Data (Original) Memory Usage: 182.29 MB (2.3% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 813 | ['WORLDCLIM_BIO1_annual_mean_temperature', 'WORLDCLIM_BIO12_annual_precipitation', 'WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month', 'WORLDCLIM_BIO15_precipitation_seasonality', 'WORLDCLIM_BIO4_temperature_seasonality', ...]
		('int', []) : 122 | ['id', 'SOIL_bdod_0.5cm_mean_0.01_deg', 'SOIL_bdod_100.200c

[1000]	valid_set's rmse: 53.3837


	-53.3795	 = Validation score (-root_mean_squared_error)
	442.04s	 = Training runtime
	0.13s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBMLarge': 0.84, 'NeuralNetFastAI': 0.16}
	-53.1964	 = Validation score (-root_mean_squared_error)
	0.03s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 12390.51s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7137.6 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X26_mean")
Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 7.35 GB / 15.79 GB (46.5%)
Disk Space Avail: 72.47 GB / 150.79 GB (48.1%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the available presets.
	Recommended 

Fitting TabularPredictor for label: X50_mean ...


Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory: 7495.31 MB
	Train Data (Original) Memory Usage: 182.62 MB (2.4% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 814 | ['WORLDCLIM_BIO1_annual_mean_temperature', 'WORLDCLIM_BIO12_annual_precipitation', 'WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month', 'WORLDCLIM_BIO15_precipitation_seasonality', 'WORLDCLIM_BIO4_temperature_seasonality', ...]
		('int', []) : 122 | ['id', 

[1000]	valid_set's rmse: 0.361925
[2000]	valid_set's rmse: 0.357162
[3000]	valid_set's rmse: 0.355106
[4000]	valid_set's rmse: 0.353916
[5000]	valid_set's rmse: 0.353093
[6000]	valid_set's rmse: 0.352683
[7000]	valid_set's rmse: 0.352526
[8000]	valid_set's rmse: 0.352398
[9000]	valid_set's rmse: 0.352323
[10000]	valid_set's rmse: 0.352234


	-0.3522	 = Validation score (-root_mean_squared_error)
	744.88s	 = Training runtime
	0.8s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 0.352549
[2000]	valid_set's rmse: 0.349969
[3000]	valid_set's rmse: 0.348952
[4000]	valid_set's rmse: 0.348591
[5000]	valid_set's rmse: 0.348339
[6000]	valid_set's rmse: 0.348147
[7000]	valid_set's rmse: 0.348034
[8000]	valid_set's rmse: 0.347988
[9000]	valid_set's rmse: 0.347937
[10000]	valid_set's rmse: 0.347919


	-0.3479	 = Validation score (-root_mean_squared_error)
	921.95s	 = Training runtime
	0.8s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-0.344	 = Validation score (-root_mean_squared_error)
	3068.82s	 = Training runtime
	0.21s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-0.3735	 = Validation score (-root_mean_squared_error)
	1075.89s	 = Training runtime
	0.21s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-0.397	 = Validation score (-root_mean_squared_error)
	161.54s	 = Training runtime
	0.25s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-0.3914	 = Validation score (-root_mean_squared_error)
	251.87s	 = Training runtime
	0.53s	 = Validation runtime
Fitting model: LightGBMLarge ...


[1000]	valid_set's rmse: 0.330805
[2000]	valid_set's rmse: 0.329588
[3000]	valid_set's rmse: 0.329333
[4000]	valid_set's rmse: 0.329259
[5000]	valid_set's rmse: 0.329238
[6000]	valid_set's rmse: 0.329229
[7000]	valid_set's rmse: 0.329227
[8000]	valid_set's rmse: 0.329226
[9000]	valid_set's rmse: 0.329226
[10000]	valid_set's rmse: 0.329226


	-0.3292	 = Validation score (-root_mean_squared_error)
	2505.43s	 = Training runtime
	1.29s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBMLarge': 0.857, 'NeuralNetFastAI': 0.095, 'RandomForestMSE': 0.048}
	-0.3284	 = Validation score (-root_mean_squared_error)
	0.02s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 8758.55s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1436.0 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X50_mean")
Verbosity: 2 (Standard Logging)
AutoGluon Version: 1.1.1
Python Version: 3.10.11
Operating System: Windows
Platform Machine: AMD64
Platform Version: 10.0.22631
CPU Count: 12
Memory Avail: 6.87 GB / 15.79 GB (43.5%)
Disk Space Avail: 70.62 GB / 150.79 GB (46.8%)
No presets specified! To achieve strong results with AutoGluon, it is recommended to use the availa

Fitting TabularPredictor for label: X3112_mean ...


	Available Memory: 7019.43 MB
	Train Data (Original) Memory Usage: 182.95 MB (2.6% of available memory)
	Inferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.
	Stage 1 Generators:
		Fitting AsTypeFeatureGenerator...
	Stage 2 Generators:
		Fitting FillNaFeatureGenerator...
	Stage 3 Generators:
		Fitting IdentityFeatureGenerator...
	Stage 4 Generators:
		Fitting DropUniqueFeatureGenerator...
	Stage 5 Generators:
		Fitting DropDuplicatesFeatureGenerator...
	Types of features in original data (raw dtype, special dtypes):
		('float', []) : 815 | ['WORLDCLIM_BIO1_annual_mean_temperature', 'WORLDCLIM_BIO12_annual_precipitation', 'WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month', 'WORLDCLIM_BIO15_precipitation_seasonality', 'WORLDCLIM_BIO4_temperature_seasonality', ...]
		('int', []) : 122 | ['id', 'SOIL_bdod_0.5cm_mean_0.01_deg', 'SOIL_bdod_100.200cm_mean_0.01_deg', 'SOIL_bdod_15.30cm_mean_

[1000]	valid_set's rmse: 1470.67
[2000]	valid_set's rmse: 1460.77
[3000]	valid_set's rmse: 1453.2
[4000]	valid_set's rmse: 1449.16
[5000]	valid_set's rmse: 1448
[6000]	valid_set's rmse: 1447.65
[7000]	valid_set's rmse: 1447.57
[8000]	valid_set's rmse: 1446.92
[9000]	valid_set's rmse: 1446.78
[10000]	valid_set's rmse: 1446.71


	-1446.6537	 = Validation score (-root_mean_squared_error)
	680.41s	 = Training runtime
	0.54s	 = Validation runtime
Fitting model: LightGBM ...


[1000]	valid_set's rmse: 1401.6
[2000]	valid_set's rmse: 1389.58
[3000]	valid_set's rmse: 1386.45
[4000]	valid_set's rmse: 1385.03
[5000]	valid_set's rmse: 1384.81
[6000]	valid_set's rmse: 1384.61
[7000]	valid_set's rmse: 1384.48
[8000]	valid_set's rmse: 1384.34
[9000]	valid_set's rmse: 1384.35


	-1384.3118	 = Validation score (-root_mean_squared_error)
	820.56s	 = Training runtime
	0.42s	 = Validation runtime
Fitting model: RandomForestMSE ...
	-1349.2685	 = Validation score (-root_mean_squared_error)
	4440.72s	 = Training runtime
	0.21s	 = Validation runtime
Fitting model: ExtraTreesMSE ...
	-1451.9243	 = Validation score (-root_mean_squared_error)
	1308.72s	 = Training runtime
	0.22s	 = Validation runtime
Fitting model: NeuralNetFastAI ...
	-1514.4165	 = Validation score (-root_mean_squared_error)
	158.34s	 = Training runtime
	0.24s	 = Validation runtime
Fitting model: NeuralNetTorch ...
	-1537.7455	 = Validation score (-root_mean_squared_error)
	143.11s	 = Training runtime
	0.53s	 = Validation runtime
Fitting model: LightGBMLarge ...


[1000]	valid_set's rmse: 1327.67
[2000]	valid_set's rmse: 1325.67
[3000]	valid_set's rmse: 1325.22
[4000]	valid_set's rmse: 1325.1
[5000]	valid_set's rmse: 1325.06
[6000]	valid_set's rmse: 1325.05
[7000]	valid_set's rmse: 1325.04
[8000]	valid_set's rmse: 1325.04
[9000]	valid_set's rmse: 1325.04
[10000]	valid_set's rmse: 1325.04


	-1325.0433	 = Validation score (-root_mean_squared_error)
	2420.99s	 = Training runtime
	1.04s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBMLarge': 0.571, 'RandomForestMSE': 0.333, 'NeuralNetFastAI': 0.095}
	-1313.9254	 = Validation score (-root_mean_squared_error)
	0.03s	 = Training runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 9995.55s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1683.5 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("multilabel_predictor_source\Predictor_X3112_mean")


MultilabelPredictor saved to disk. Load with: MultilabelPredictor.load('multilabel_predictor_source')


In [3]:
test_df = pd.read_csv(test_csv_path)
print("Extracting image embeddings for test data...")
# test_image_embeddings = preprocess_images(test_df, test_image_dir)
with open('test_image_embeddings.pkl', 'rb') as f:
 test_image_embeddings = pickle.load(f)

test_combined = pd.concat([test_df[ancillary_columns], test_image_embeddings], axis=1)

# Make predictions on test data
print("Making predictions on test data...")
predictions = multi_predictor.predict(test_combined)

# Save predictions to CSV
print(f"Saving predictions to {output_path}...")
predictions.insert(0, 'id', test_df['id'])
predictions.to_csv(output_path, index=False)
print("Predictions saved successfully!")

Extracting image embeddings for test data...
Making predictions on test data...
Predicting with TabularPredictor for label: X4_mean ...
Predicting with TabularPredictor for label: X11_mean ...
Predicting with TabularPredictor for label: X18_mean ...
Predicting with TabularPredictor for label: X26_mean ...
Predicting with TabularPredictor for label: X50_mean ...
Predicting with TabularPredictor for label: X3112_mean ...
Saving predictions to prediction.csv...
Predictions saved successfully!
