import os
import json
import re

def extract_frame_number(frame_name):
    """
    提取帧名称中的数字部分。
    例如，将 '_0q5WkK91xU_13_1_frame0002' 提取为 '0002'。
    """
    match = re.search(r'frame(\d+)', frame_name)
    return int(match.group(1)) if match else None

def generate_new_json(data_dir, output_json_path, training_testing, data_dict = {}):

    training = ['0041', '0048', '0094', '0100', '0116',  '0168', '0175', '0189',  '0232', '0259', '0250']
    testing = ['0278', '0290', '0295', '0297', '0026', '0156', '0195', '0099', "0262"]
    ty = ["e0_raw","e1_raw","e2_raw","e3_raw",
          "e4_raw","e5_raw","e6_raw","e7_raw",
          "e8_raw","e9_raw","e10_raw","e11_raw",
          "s1_all_raw","s2_all_raw","s3_all_raw",
          "s4_all_raw","s5_all_raw","s6_all_raw"]

    # 遍历数据目录
    for video_name in os.listdir(data_dir):
        video_path = os.path.join(data_dir, video_name)
        print(f"process {video_name}")
        if not os.path.isdir(video_path):
            continue  # 跳过不是目录的文件
        
        for vid in video_name:
            if vid.split("_")[0] not in training:
                continue
            vid_path = os.path.join(video_path, vid)
            frame_path_list = []
            shapes_list = []
            
            # 遍历视频目录中的帧目录，并排序
            frame_dirs = sorted(
                os.listdir(vid_path),
                key=lambda x: extract_frame_number(x)
            )

            for frame_name in frame_dirs:
                frame_path = os.path.join(video_path, frame_name)
                if not os.path.isdir(frame_path):
                    continue  # 跳过不是目录的文件
                
                # 构建 _inputs.jpg 和 _shape_images.jpg 的路径
                inputs_path = os.path.join(frame_path, f"{frame_name}_inputs.jpg")
                shapes_path = os.path.join(frame_path, f"{frame_name}_shape_detail_images.jpg")
                
                # 检查文件是否存在
                if os.path.exists(inputs_path):
                    frame_path_list.append(inputs_path)
                if os.path.exists(shapes_path):
                    shapes_list.append(shapes_path)

            if len(frame_path_list) != len(shapes_list) or len(frame_path_list) < 10:
                print(f"!!! invalid {video_name} !!!")
                continue
            
            # 将数据放入 clip_data_list 结构
            clip_data = {
                'frame_path_list': frame_path_list,
                'shapes_list': shapes_list
            }
            
            # 保存到数据字典中
            data_dict[video_name] = {'clip_data_list': [clip_data]}

        # 将数据写入 JSON 文件
        with open(output_json_path, 'w') as f:
            json.dump(data_dict, f, indent=2)

# 调用函数生成 JSON
data_directory = '/mnt/znzz/jus/RenderMe-360_release_data_20id_full/multiview_flame_0811'  # 数据目录
output_json_file = '/root/users/jusjus/AniPortrait/dataset/renderme_multi_detailed_v2_celebv_mix.json'  # 输出 JSON 文件路径
training_testing = "training"
generate_new_json(data_directory, output_json_file, training_testing)

"""def count_root_entries(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    return len(data)

num_entries = count_root_entries(output_json_file)
print(f"根级条目数量: {num_entries}")
"""