def run(paths, args):
    import os, glob, csv, pickle, random, importlib
    import numpy as np
    import pandas as pd
    import torch
    from torch.utils.data import Subset, DataLoader
    from torchvision import datasets, transforms
    from scipy.stats import spearmanr

    from src import calibrate

    imagenet_mean = [0.485, 0.456, 0.406]
    imagenet_std  = [0.229, 0.224, 0.225]
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
    ])

    ds_dir = paths["dataset_dir"]
    img_dir = ds_dir / "Images"
    lbl_dir = ds_dir / "labels"
    dataset = datasets.ImageFolder(str(img_dir), transform=transform)

    dataset.classes.sort()

    class_to_indices = {}
    for idx, (_, label) in enumerate(dataset.samples):
        class_to_indices.setdefault(label, []).append(idx)

    calib_indices, test_indices = [], []
    random.seed(42)
    calib_data_cutoff = 200
    for _, indices in class_to_indices.items():
        random.shuffle(indices)
        calib = indices[:calib_data_cutoff]
        remaining = indices[calib_data_cutoff:]
        n = len(remaining); n_train = int(0.7 * n); n_val = int(0.1 * n)
        test = remaining[n_train + n_val:]
        calib_indices.extend(calib)
        test_indices.extend(test)

    calib_set = Subset(dataset, calib_indices)
    test_set = Subset(dataset, test_indices)

    test_loader  = DataLoader(test_set, batch_size=32, shuffle=False)
    calib_loader = DataLoader(calib_set, batch_size=32, shuffle=False)

    test_files = [dataset.samples[i][0].split('/', 3)[-1] for i in test_indices]
    csv_files = glob.glob(str(lbl_dir / "*.csv"))
    multiple_labels = pd.concat([pd.read_csv(f) for f in csv_files], ignore_index=True)

    set_groundtruth_test = []
    for path in test_files:
        labels = multiple_labels[multiple_labels['image'] == path]
        if labels.empty:
            class_names = ['eroded farmland']
        else:
            class_names = labels.columns[(labels.iloc[0] == 1) & (labels.columns != 'image')].tolist()
            if not class_names:
                class_names = ['eroded farmland']
        set_groundtruth_test.append(class_names)

    groundtruth_length = [len(i) for i in set_groundtruth_test]
    y_true = np.concatenate([targets.cpu().numpy() for _, targets in test_loader])

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model_names = ['resnet18', 'resnet34', 'resnet50', 'vgg16', 'vgg19', 'densenet121', 'densenet161', 'mobilenet_v2']

    sets_eval = {'cc': [], 'ssc': [], 'width_average': [], 'spearman_corr': [], 'p_value': []}
    def compute_metrics(prediction_sets):
        cc = calibrate.compute_coverage(prediction_sets, y_true)
        ssc = calibrate.compute_size_stratified_coverage(prediction_sets, y_true)
        width_average = calibrate.compute_average_width(prediction_sets)
        prediction_set_length = [len(i) for i in prediction_sets]
        spearman_corr, p_value = spearmanr(groundtruth_length, prediction_set_length)
        sets_eval['cc'].append(cc); sets_eval['ssc'].append(ssc)
        sets_eval['width_average'].append(width_average)
        sets_eval['spearman_corr'].append(spearman_corr)
        sets_eval['p_value'].append(p_value)
        return cc, ssc, width_average, spearman_corr

    def cp_cond_entropy_corr(cond_entropy, prediction_sets):
        prediction_set_length = [len(i) for i in prediction_sets]
        spearman_corr, p_value = spearmanr(cond_entropy, prediction_set_length)
        return spearman_corr

    performance_table = {'acc': [], 'lac_cc': [], 'lac_ssc': [], 'lac_mean_width': [],
                         'aps_cc': [], 'aps_ssc': [], 'aps_mean_width': [],
                         'raps_cc': [], 'raps_ssc': [], 'raps_mean_width': []}
    correlation_table = {'lac_spearman': [], 'aps_spearman': [], 'raps_spearman': []}
    ece_table = []
    correlation_table_entropy = {'lac': [], 'aps': [], 'raps': []}
    prediction_sets_holder = []
    alpha = 0.05

    base_results = paths["results_dir"]
    base_results.mkdir(parents=True, exist_ok=True)

    def _dict_to_csv(d, out_name, directory):
        rows = zip(*d.values())
        with open(directory / out_name, "w", newline="") as csvfile:
            w = csv.writer(csvfile); w.writerow(list(d.keys())); w.writerows(rows)

    def _save_all(directory):
        with open(directory / "ece_table_MLRSNet.csv", "w", newline="") as f:
            csv.writer(f).writerows([[item] for item in ece_table])
        _dict_to_csv(performance_table, "performance_table_MLRSNet.csv", directory)
        _dict_to_csv(correlation_table, "correlation_table_MLRSNet.csv", directory)
        _dict_to_csv(correlation_table_entropy, "correlation_table_entropy_MLRSNet.csv", directory)

    for name in model_names:
        # per-model results folder: <timestamp>_<modelname>
        results_dir = base_results.parent / f"{base_results.name}_{name}"
        results_dir.mkdir(parents=True, exist_ok=True)

        model = calibrate.get_model(name, dataset, device)
        ece = calibrate.compute_ece(model, test_loader, device)
        ece_table.append(ece)

        cal_nonconformity_scores, _ = calibrate.compute_nonconformity_scores(model, calib_loader, device)
        threshold = calibrate.determine_threshold(cal_nonconformity_scores, alpha)
        prediction_sets = calibrate.predict_with_lac(model, test_loader, threshold, device)
        prediction_sets_holder.append(prediction_sets)

        cond_entropy = calibrate.compute_conditional_entropy_per_image(model, test_loader, device)
        correlation_table_entropy['lac'].append(cp_cond_entropy_corr(cond_entropy, prediction_sets))

        acc = calibrate.compute_accuracy(model, test_loader, device)
        performance_table['acc'].append(acc)
        cc, ssc, width_average, spearman_corr = compute_metrics(prediction_sets)
        performance_table['lac_cc'].append(cc); performance_table['lac_ssc'].append(ssc)
        performance_table['lac_mean_width'].append(width_average)
        correlation_table['lac_spearman'].append(spearman_corr)

        aps_scores = calibrate.compute_aps_scores(model, calib_loader, device)
        threshold = calibrate.determine_threshold(aps_scores, alpha)
        aps_prediction_sets = calibrate.predict_with_aps(model, test_loader, threshold, device)
        prediction_sets_holder.append(aps_prediction_sets)
        correlation_table_entropy['aps'].append(cp_cond_entropy_corr(cond_entropy, aps_prediction_sets))
        cc, ssc, width_average, spearman_corr = compute_metrics(aps_prediction_sets)
        performance_table['aps_cc'].append(cc); performance_table['aps_ssc'].append(ssc)
        performance_table['aps_mean_width'].append(width_average)
        correlation_table['aps_spearman'].append(spearman_corr)

        raps_scores = calibrate.compute_raps_scores(
            model, calib_loader, device,
            lambda_reg=0.05, k_reg=5, beta=1.0,
            calibration_fraction=0.9, random_state=42
        )
        threshold = calibrate.determine_threshold(raps_scores, alpha)
        raps_sets = calibrate.predict_with_raps(model, test_loader, threshold, device)
        prediction_sets_holder.append(raps_sets)
        correlation_table_entropy['raps'].append(cp_cond_entropy_corr(cond_entropy, raps_sets))
        cc, ssc, width_average, spearman_corr = compute_metrics(raps_sets)
        performance_table['raps_cc'].append(cc); performance_table['raps_ssc'].append(ssc)
        performance_table['raps_mean_width'].append(width_average)
        correlation_table['raps_spearman'].append(spearman_corr)

        # save partial results after each model into the model-specific folder
        _save_all(results_dir)
        del model

    prediction_sets_holder.insert(0, y_true)
    prediction_sets_holder.insert(1, groundtruth_length)
    with open(base_results / "prediction_sets_MLRSNet.pkl", "wb") as f:
        pickle.dump(prediction_sets_holder, f)

    _save_all(base_results)
