model_name="OpenFlamingo-3B-DPO-70symbol-gqa-10k-noinstruct"
data_path="/data/jcy/data/data/DPO/dpo_gqa/gqa_dpo_2shot_of_70sym_noinstruct.json"

torchrun --nnodes=1 --nproc_per_node=8 open_flamingo/train/train_dpo.py   \
--lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly   \
--tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly   \
--model_path /data/jcy/ckpt/openflamingo/OpenFlamingo-3B-vitl-mpt1b-langinstruct/checkpoint.pt    \
--data_path $data_path  \
--image_folder /data/jcy/data/data   \
--cross_attn_every_n_layers 1   \
--dataset_resampled   \
--run_name $model_name   \
--report_to_wandb  \
--train_num_samples 10000  \
--learning_rate 5e-6  \
--num_train_epochs 1  \
--save_strategy steps   \
--save_steps 1250  \
--logging_steps 10 

#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=2
#SBATCH --gpus-per-task=1

<<com
Example Slurm evaluation script. 
Notes:
- VQAv2 test-dev and test-std annotations are not publicly available. 
  To evaluate on these splits, please follow the VQAv2 instructions and submit to EvalAI.
  This script will evaluate on the val split.
com

export PYTHONFAULTHANDLER=1
export CUDA_LAUNCH_BLOCKING=0
# export HOSTNAMES=hostname -I | awk '{print $1}'
export MASTER_ADDR=172.17.0.2
export MASTER_PORT=29968

# coco

export PYTHONPATH="$PYTHONPATH:open_flamingo"
torchrun --nnodes=1 --nproc_per_node=8  open_flamingo/eval/evaluate.py \
    --vision_encoder_path ViT-L-14 \
    --vision_encoder_pretrained openai\
    --lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --lm_tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --cross_attn_every_n_layers 1 \
    --checkpoint_path "/data/jcy/open_flamingo/${model_name}/checkpoint_final.pt" \
    --results_file "results-${model_name}-coco.json" \
    --precision amp_bf16 \
    --batch_size 32 \
    --eval_coco \
    --coco_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --coco_val_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --coco_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_coco.json" \
    --coco_annotations_json_path "/data/jcy/data/data/coco/annotations/captions_val2014.json" \
    --vqav2_train_image_dir_path "/data/jcy/data/data/VQAv2/train2014" \
    --vqav2_train_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_train2014_annotations.json" \
    --vqav2_train_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_train2014_questions.json" \
    --vqav2_test_image_dir_path "/data/jcy/data/data/VQAv2/val2014" \
    --vqav2_test_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_val2014_annotations.json" \
    --vqav2_test_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_val2014_questions.json" \
    --flickr_image_dir_path "/data/jcy/data/data/Flickr_30K/flickr30k-images" \
    --flickr_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_flickr30k.json" \
    --flickr_annotations_json_path "/data/jcy/data/data/Flickr_30K/dataset_flickr30k_coco_style.json" \
    --ok_vqa_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --ok_vqa_train_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_train2014_annotations.json" \
    --ok_vqa_train_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_train2014_questions.json" \
    --ok_vqa_test_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --ok_vqa_test_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_val2014_annotations.json" \
    --ok_vqa_test_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_val2014_questions.json" \
    --textvqa_image_dir_path "/data/jcy/data/data/textvqa/train_images/" \
    --textvqa_train_questions_json_path "/data/jcy/data/data/textvqa/train_questions_vqa_format.json" \
    --textvqa_train_annotations_json_path "/data/jcy/data/data/textvqa/train_annotations_vqa_format.json" \
    --textvqa_test_questions_json_path "/data/jcy/data/data/textvqa/val_questions_vqa_format.json" \
    --textvqa_test_annotations_json_path "/data/jcy/data/data/textvqa/val_annotations_vqa_format.json" \
    --vizwiz_train_image_dir_path "/data/jcy/data/data/Vizwiz/train" \
    --vizwiz_test_image_dir_path "/data/jcy/data/data/Vizwiz/val" \
    --vizwiz_train_questions_json_path "/data/jcy/data/data/Vizwiz/train_questions_vqa_format.json" \
    --vizwiz_train_annotations_json_path "/data/jcy/data/data/Vizwiz/train_annotations_vqa_format.json" \
    --vizwiz_test_questions_json_path "/data/jcy/data/data/Vizwiz/val_questions_vqa_format.json" \
    --vizwiz_test_annotations_json_path "/data/jcy/data/data/Vizwiz/val_annotations_vqa_format.json" \
    --hateful_memes_image_dir_path "/data/jcy/data/data/hatefulmemes/data/img" \
    --hateful_memes_train_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/train.jsonl" \
    --hateful_memes_test_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/dev.jsonl" 

# flickr

torchrun --nnodes=1 --nproc_per_node=8  open_flamingo/eval/evaluate.py \
    --vision_encoder_path ViT-L-14 \
    --vision_encoder_pretrained openai\
    --lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --lm_tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --cross_attn_every_n_layers 1 \
    --checkpoint_path "/data/jcy/open_flamingo/${model_name}/checkpoint_final.pt" \
    --results_file "results-${model_name}-flickr.json" \
    --precision amp_bf16 \
    --batch_size 32 \
    --eval_flickr30 \
    --coco_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --coco_val_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --coco_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_coco.json" \
    --coco_annotations_json_path "/data/jcy/data/data/coco/annotations/captions_val2014.json" \
    --vqav2_train_image_dir_path "/data/jcy/data/data/VQAv2/train2014" \
    --vqav2_train_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_train2014_annotations.json" \
    --vqav2_train_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_train2014_questions.json" \
    --vqav2_test_image_dir_path "/data/jcy/data/data/VQAv2/val2014" \
    --vqav2_test_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_val2014_annotations.json" \
    --vqav2_test_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_val2014_questions.json" \
    --flickr_image_dir_path "/data/jcy/data/data/Flickr_30K/flickr30k-images" \
    --flickr_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_flickr30k.json" \
    --flickr_annotations_json_path "/data/jcy/data/data/Flickr_30K/dataset_flickr30k_coco_style.json" \
    --ok_vqa_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --ok_vqa_train_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_train2014_annotations.json" \
    --ok_vqa_train_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_train2014_questions.json" \
    --ok_vqa_test_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --ok_vqa_test_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_val2014_annotations.json" \
    --ok_vqa_test_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_val2014_questions.json" \
    --textvqa_image_dir_path "/data/jcy/data/data/textvqa/train_images/" \
    --textvqa_train_questions_json_path "/data/jcy/data/data/textvqa/train_questions_vqa_format.json" \
    --textvqa_train_annotations_json_path "/data/jcy/data/data/textvqa/train_annotations_vqa_format.json" \
    --textvqa_test_questions_json_path "/data/jcy/data/data/textvqa/val_questions_vqa_format.json" \
    --textvqa_test_annotations_json_path "/data/jcy/data/data/textvqa/val_annotations_vqa_format.json" \
    --vizwiz_train_image_dir_path "/data/jcy/data/data/Vizwiz/train" \
    --vizwiz_test_image_dir_path "/data/jcy/data/data/Vizwiz/val" \
    --vizwiz_train_questions_json_path "/data/jcy/data/data/Vizwiz/train_questions_vqa_format.json" \
    --vizwiz_train_annotations_json_path "/data/jcy/data/data/Vizwiz/train_annotations_vqa_format.json" \
    --vizwiz_test_questions_json_path "/data/jcy/data/data/Vizwiz/val_questions_vqa_format.json" \
    --vizwiz_test_annotations_json_path "/data/jcy/data/data/Vizwiz/val_annotations_vqa_format.json" \
    --hateful_memes_image_dir_path "/data/jcy/data/data/hatefulmemes/data/img" \
    --hateful_memes_train_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/train.jsonl" \
    --hateful_memes_test_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/dev.jsonl" 

# okvqa

torchrun --nnodes=1 --nproc_per_node=8  open_flamingo/eval/evaluate.py \
    --vision_encoder_path ViT-L-14 \
    --vision_encoder_pretrained openai\
    --lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --lm_tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --cross_attn_every_n_layers 1 \
    --checkpoint_path "/data/jcy/open_flamingo/${model_name}/checkpoint_final.pt" \
    --results_file "results-${model_name}-okvqa.json" \
    --precision amp_bf16 \
    --batch_size 32 \
    --eval_ok_vqa \
    --coco_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --coco_val_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --coco_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_coco.json" \
    --coco_annotations_json_path "/data/jcy/data/data/coco/annotations/captions_val2014.json" \
    --vqav2_train_image_dir_path "/data/jcy/data/data/VQAv2/train2014" \
    --vqav2_train_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_train2014_annotations.json" \
    --vqav2_train_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_train2014_questions.json" \
    --vqav2_test_image_dir_path "/data/jcy/data/data/VQAv2/val2014" \
    --vqav2_test_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_val2014_annotations.json" \
    --vqav2_test_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_val2014_questions.json" \
    --flickr_image_dir_path "/data/jcy/data/data/Flickr_30K/flickr30k-images" \
    --flickr_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_flickr30k.json" \
    --flickr_annotations_json_path "/data/jcy/data/data/Flickr_30K/dataset_flickr30k_coco_style.json" \
    --ok_vqa_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --ok_vqa_train_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_train2014_annotations.json" \
    --ok_vqa_train_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_train2014_questions.json" \
    --ok_vqa_test_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --ok_vqa_test_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_val2014_annotations.json" \
    --ok_vqa_test_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_val2014_questions.json" \
    --textvqa_image_dir_path "/data/jcy/data/data/textvqa/train_images/" \
    --textvqa_train_questions_json_path "/data/jcy/data/data/textvqa/train_questions_vqa_format.json" \
    --textvqa_train_annotations_json_path "/data/jcy/data/data/textvqa/train_annotations_vqa_format.json" \
    --textvqa_test_questions_json_path "/data/jcy/data/data/textvqa/val_questions_vqa_format.json" \
    --textvqa_test_annotations_json_path "/data/jcy/data/data/textvqa/val_annotations_vqa_format.json" \
    --vizwiz_train_image_dir_path "/data/jcy/data/data/Vizwiz/train" \
    --vizwiz_test_image_dir_path "/data/jcy/data/data/Vizwiz/val" \
    --vizwiz_train_questions_json_path "/data/jcy/data/data/Vizwiz/train_questions_vqa_format.json" \
    --vizwiz_train_annotations_json_path "/data/jcy/data/data/Vizwiz/train_annotations_vqa_format.json" \
    --vizwiz_test_questions_json_path "/data/jcy/data/data/Vizwiz/val_questions_vqa_format.json" \
    --vizwiz_test_annotations_json_path "/data/jcy/data/data/Vizwiz/val_annotations_vqa_format.json" \
    --hateful_memes_image_dir_path "/data/jcy/data/data/hatefulmemes/data/img" \
    --hateful_memes_train_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/train.jsonl" \
    --hateful_memes_test_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/dev.jsonl"

# textvqa

torchrun --nnodes=1 --nproc_per_node=8  open_flamingo/eval/evaluate.py \
    --vision_encoder_path ViT-L-14 \
    --vision_encoder_pretrained openai\
    --lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --lm_tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
    --cross_attn_every_n_layers 1 \
    --checkpoint_path "/data/jcy/open_flamingo/${model_name}/checkpoint_final.pt" \
    --results_file "results-${model_name}-textvqa.json" \
    --precision amp_bf16 \
    --batch_size 32 \
    --eval_textvqa \
    --coco_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --coco_val_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --coco_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_coco.json" \
    --coco_annotations_json_path "/data/jcy/data/data/coco/annotations/captions_val2014.json" \
    --vqav2_train_image_dir_path "/data/jcy/data/data/VQAv2/train2014" \
    --vqav2_train_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_train2014_annotations.json" \
    --vqav2_train_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_train2014_questions.json" \
    --vqav2_test_image_dir_path "/data/jcy/data/data/VQAv2/val2014" \
    --vqav2_test_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_val2014_annotations.json" \
    --vqav2_test_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_val2014_questions.json" \
    --flickr_image_dir_path "/data/jcy/data/data/Flickr_30K/flickr30k-images" \
    --flickr_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_flickr30k.json" \
    --flickr_annotations_json_path "/data/jcy/data/data/Flickr_30K/dataset_flickr30k_coco_style.json" \
    --ok_vqa_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
    --ok_vqa_train_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_train2014_annotations.json" \
    --ok_vqa_train_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_train2014_questions.json" \
    --ok_vqa_test_image_dir_path "/data/jcy/data/data/coco/val2014" \
    --ok_vqa_test_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_val2014_annotations.json" \
    --ok_vqa_test_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_val2014_questions.json" \
    --textvqa_image_dir_path "/data/jcy/data/data/textvqa/train_images/" \
    --textvqa_train_questions_json_path "/data/jcy/data/data/textvqa/train_questions_vqa_format.json" \
    --textvqa_train_annotations_json_path "/data/jcy/data/data/textvqa/train_annotations_vqa_format.json" \
    --textvqa_test_questions_json_path "/data/jcy/data/data/textvqa/val_questions_vqa_format.json" \
    --textvqa_test_annotations_json_path "/data/jcy/data/data/textvqa/val_annotations_vqa_format.json" \
    --vizwiz_train_image_dir_path "/data/jcy/data/data/Vizwiz/train" \
    --vizwiz_test_image_dir_path "/data/jcy/data/data/Vizwiz/val" \
    --vizwiz_train_questions_json_path "/data/jcy/data/data/Vizwiz/train_questions_vqa_format.json" \
    --vizwiz_train_annotations_json_path "/data/jcy/data/data/Vizwiz/train_annotations_vqa_format.json" \
    --vizwiz_test_questions_json_path "/data/jcy/data/data/Vizwiz/val_questions_vqa_format.json" \
    --vizwiz_test_annotations_json_path "/data/jcy/data/data/Vizwiz/val_annotations_vqa_format.json" \
    --hateful_memes_image_dir_path "/data/jcy/data/data/hatefulmemes/data/img" \
    --hateful_memes_train_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/train.jsonl" \
    --hateful_memes_test_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/dev.jsonl" 

# vqav2

# torchrun --nnodes=1 --nproc_per_node=8  open_flamingo/eval/evaluate.py \
#     --vision_encoder_path ViT-L-14 \
#     --vision_encoder_pretrained openai\
#     --lm_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
#     --lm_tokenizer_path /data/jcy/ckpt/anas-awadalla/mpt-1b-redpajama-200b-dolly \
#     --cross_attn_every_n_layers 1 \
#     --checkpoint_path "/data/jcy/open_flamingo/${model_name}/checkpoint_final.pt" \
#     --results_file "results-${model_name}-vqav2.json" \
#     --precision amp_bf16 \
#     --batch_size 32 \
#     --eval_vqav2 \
#     --coco_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
#     --coco_val_image_dir_path "/data/jcy/data/data/coco/val2014" \
#     --coco_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_coco.json" \
#     --coco_annotations_json_path "/data/jcy/data/data/coco/annotations/captions_val2014.json" \
#     --vqav2_train_image_dir_path "/data/jcy/data/data/VQAv2/train2014" \
#     --vqav2_train_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_train2014_annotations.json" \
#     --vqav2_train_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_train2014_questions.json" \
#     --vqav2_test_image_dir_path "/data/jcy/data/data/VQAv2/val2014" \
#     --vqav2_test_annotations_json_path "/data/jcy/data/data/VQAv2/v2_mscoco_val2014_annotations.json" \
#     --vqav2_test_questions_json_path "/data/jcy/data/data/VQAv2/v2_OpenEnded_mscoco_val2014_questions.json" \
#     --flickr_image_dir_path "/data/jcy/data/data/Flickr_30K/flickr30k-images" \
#     --flickr_karpathy_json_path "/data/jcy/data/data/karpathy/dataset_flickr30k.json" \
#     --flickr_annotations_json_path "/data/jcy/data/data/Flickr_30K/dataset_flickr30k_coco_style.json" \
#     --ok_vqa_train_image_dir_path "/data/jcy/data/data/coco/train2014" \
#     --ok_vqa_train_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_train2014_annotations.json" \
#     --ok_vqa_train_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_train2014_questions.json" \
#     --ok_vqa_test_image_dir_path "/data/jcy/data/data/coco/val2014" \
#     --ok_vqa_test_annotations_json_path "/data/jcy/data/data/OKVQA/mscoco_val2014_annotations.json" \
#     --ok_vqa_test_questions_json_path "/data/jcy/data/data/OKVQA/OpenEnded_mscoco_val2014_questions.json" \
#     --textvqa_image_dir_path "/data/jcy/data/data/textvqa/train_images/" \
#     --textvqa_train_questions_json_path "/data/jcy/data/data/textvqa/train_questions_vqa_format.json" \
#     --textvqa_train_annotations_json_path "/data/jcy/data/data/textvqa/train_annotations_vqa_format.json" \
#     --textvqa_test_questions_json_path "/data/jcy/data/data/textvqa/val_questions_vqa_format.json" \
#     --textvqa_test_annotations_json_path "/data/jcy/data/data/textvqa/val_annotations_vqa_format.json" \
#     --vizwiz_train_image_dir_path "/data/jcy/data/data/Vizwiz/train" \
#     --vizwiz_test_image_dir_path "/data/jcy/data/data/Vizwiz/val" \
#     --vizwiz_train_questions_json_path "/data/jcy/data/data/Vizwiz/train_questions_vqa_format.json" \
#     --vizwiz_train_annotations_json_path "/data/jcy/data/data/Vizwiz/train_annotations_vqa_format.json" \
#     --vizwiz_test_questions_json_path "/data/jcy/data/data/Vizwiz/val_questions_vqa_format.json" \
#     --vizwiz_test_annotations_json_path "/data/jcy/data/data/Vizwiz/val_annotations_vqa_format.json" \
#     --hateful_memes_image_dir_path "/data/jcy/data/data/hatefulmemes/data/img" \
#     --hateful_memes_train_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/train.jsonl" \
#     --hateful_memes_test_annotations_json_path "/data/jcy/data/data/hatefulmemes/data/dev.jsonl" 