import pickle
import gzip
import zipfile
import os
import time
import numpy as np

# Step 1: Load the safe pickle file
file_name = input("Enter the path to the safe pickle file (e.g., for_codalab_XXXXXX.pkl.gz): ")

# if .zip file is provided, extract the .pkl.gz file
if file_name.endswith('.zip'):
    with zipfile.ZipFile(file_name, 'r') as zf:
        # Assuming there's only one file in the zip
        file_name = zf.namelist()[0]
        zf.extract(file_name, path=os.path.dirname(file_name))
        file_name = os.path.join(os.path.dirname(file_name), file_name)
if not file_name.endswith('.pkl.gz'):
    raise ValueError("The provided file must be a .pkl.gz file or a .zip containing a .pkl.gz file.")

with gzip.open(file_name, 'rb') as f:
    safe_data = pickle.load(f)

# Step 2: Convert lists back to numpy arrays
def to_numpy(obj):
    if isinstance(obj, list):
        try:
            return np.array(obj)
        except:
            return [to_numpy(v) for v in obj]
    elif isinstance(obj, dict):
        return {k: to_numpy(v) for k, v in obj.items()}
    elif isinstance(obj, tuple):
        return tuple(to_numpy(v) for v in obj)
    else:
        return obj
    
def check_and_convert(obj, prefix=""):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {k: check_and_convert(v, prefix=f"{prefix}{k}->") for k, v in obj.items()}
    elif isinstance(obj, list):
        return [check_and_convert(v, prefix=f"{prefix}[{i}]->") for i, v in enumerate(obj)]
    elif isinstance(obj, tuple):
        return tuple(check_and_convert(v, prefix=f"{prefix}({i})->") for i, v in enumerate(obj))
    else:
        return obj

which = input("Do you want to convert the data back to numpy arrays? (yes/no): ").strip().lower()
if which == 'yes':
    outputs_to_write = to_numpy(safe_data)
else:
    outputs_to_write = check_and_convert(safe_data)

# Step 3: Save using pickle protocol=2 (safe for Codalab Python 3 + NumPy 1.18)
os.remove(file_name)  # Remove the original file
output_fname = os.path.basename(file_name)  #.replace('.pkl.gz', '_rebuilt.pkl.gz')
final_output_path = os.path.join(os.path.dirname(file_name), output_fname)

with gzip.open(final_output_path, 'wb') as f:
    pickle.dump(outputs_to_write, f, protocol=2)

# Step 4: Zip it
if which == 'yes':
    zip_output_path = final_output_path.replace('.pkl.gz', '.zip')
    with zipfile.ZipFile(zip_output_path, 'w') as zf:
        zf.write(final_output_path, arcname=os.path.basename(final_output_path))
    print("Rebuilt and zipped using NumPy 1.18:", zip_output_path)
    # Remove the original pkl.gz file and the rebuilt pkl.gz file
    os.remove(final_output_path)
else:
    print("Saved as list format:", final_output_path)