import os
import json
import re

def extract_frame_number(frame_name):
    """
    提取帧名称中的数字部分。
    例如，将 '_0q5WkK91xU_13_1_frame0002' 提取为 '0002'。
    """
    match = re.search(r'frame(\d+)', frame_name)
    return int(match.group(1)) if match else None

def generate_new_json(data_dir, output_json_path):
    data_dict = {}

    # 遍历数据目录
    for video_name in os.listdir(data_dir):
        video_path = os.path.join(data_dir, video_name)
        print(f"process {video_name}")
        if not os.path.isdir(video_path):
            continue  # 跳过不是目录的文件
        
        frame_path_list = []
        shapes_list = []
        
        # 遍历视频目录中的帧目录，并排序
        frame_dirs = sorted(
            os.listdir(video_path),
            key=lambda x: extract_frame_number(x)
        )

        for frame_name in frame_dirs:
            frame_path = os.path.join(video_path, frame_name)
            if not os.path.isdir(frame_path):
                continue  # 跳过不是目录的文件
            
            # 构建 _inputs.jpg 和 _shape_images.jpg 的路径
            inputs_path = os.path.join(frame_path, f"orig_{frame_name}_inputs.jpg")
            shapes_path = os.path.join(frame_path, f"orig_{frame_name}_shape_images.jpg")
            
            # 检查文件是否存在
            if os.path.exists(inputs_path):
                frame_path_list.append(inputs_path)
            if os.path.exists(shapes_path):
                shapes_list.append(shapes_path)

        if len(frame_path_list) != len(shapes_list) or len(frame_path_list) < 10:
            print(f"!!! invalid {video_name} !!!")
            continue
        
        # 将数据放入 clip_data_list 结构
        clip_data = {
            'frame_path_list': frame_path_list,
            'shapes_list': shapes_list
        }
        
        # 保存到数据字典中
        data_dict[video_name] = {'clip_data_list': [clip_data]}

    # 将数据写入 JSON 文件
    with open(output_json_path, 'w') as f:
        json.dump(data_dict, f, indent=2)

# 调用函数生成 JSON
data_directory = '/mnt/znzz/kkk/datasets/aniportrait/deca_head1000_yolo'  # 数据目录
output_json_file = '/root/users/jusjus/AniPortrait/dataset/celebvtext_head1000_yolo_detailed.json'  # 输出 JSON 文件路径
generate_new_json(data_directory, output_json_file)

"""def count_root_entries(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    return len(data)

json_file_path = 'dataset/celebvtext_head1000.json'  # 替换为你的 JSON 文件路径
num_entries = count_root_entries(json_file_path)
print(f"根级条目数量: {num_entries}")"""
