GigaBrain Challenge 2026: Workshop on World Models Empowering Vision Language Action Model
Kinema4D: Kinematic 4D World Modeling for Spatiotemporal Embodied Simulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Mutian and Zhang, Tianbao and Liu, Tianqi and Chen, Zhaoxi and Han, Xiaoguang and Liu, Ziwei}, title = {Kinema4D: Kinematic 4D World Modeling for Spatiotemporal Embodied Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4495-4500} }
FineGrasp: Towards Robust Grasping for Delicate Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yun and Zhao, Mengao and Lin, Tianwei and Jin, Yiwei and Huang, Chaodong and Su, Zhizhong}, title = {FineGrasp: Towards Robust Grasping for Delicate Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4428-4435} }
RoboWM-Bench: A Benchmark for Evaluating World Models in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Feng and Chen, Yang and Xu, Kyle and Liu, Yuchen and Wang, Haifeng and Shen, Zhenhao and Lu, Jasper and Huang, Shengze and Wang, Yuanfei and Xie, Chen and Wu, Ruihai}, title = {RoboWM-Bench: A Benchmark for Evaluating World Models in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4455-4460} }
ProDrive: Proactive Planning for Autonomous Driving via Ego-Environment Co-Evolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Chuyao and Gan, Shengzhe and Ouyang, Zhuoli and Rui, Yuhan and Chi, Xiaowei and Han, Sirui and Wang, Jiankun and Zhang, Hong}, title = {ProDrive: Proactive Planning for Autonomous Driving via Ego-Environment Co-Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4436-4445} }
Realtime-VLA V2: Learning to Run VLAs Fast, Smooth, and Accurate-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chen and Hu, Yucheng and Ma, Yunchao and Yang, Yunhuan and Tan, Jing and Fan, Haoqiang}, title = {Realtime-VLA V2: Learning to Run VLAs Fast, Smooth, and Accurate}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4501-4509} }
ARM: Advantage Reward Modeling for Long-Horizon Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Yiming and Yu, Zixi and Mao, Weixin and Li, Yinhao and Hu, Qirui and Lan, Zihan and Zhu, Minzhao and Chen, Hua}, title = {ARM: Advantage Reward Modeling for Long-Horizon Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4468-4477} }
Table30 V2: Evaluating Generalized Models by Real Robots at Scale-
[pdf]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yunchao and Chen, Ze and Zhou, Erjin and Liu, Ziming and Zhang, Haowei and Liu, Kai and Fan, Haoqiang}, title = {Table30 V2: Evaluating Generalized Models by Real Robots at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4461-4467} }
WA-RL: World-Action Model Reinforcement Learning with Reconstruction Rewards and Online Video SFT-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Zezhong and Chi, Xiaowei and Qi, Yu and Li, Haozhan and Chen, Zhi Yang and Zhang, Shanghang}, title = {WA-RL: World-Action Model Reinforcement Learning with Reconstruction Rewards and Online Video SFT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4478-4485} }
A Latent-Centric Perspective on World Models for Autonomous Driving: Taxonomy, Evaluation, and Challenges-
[pdf]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Rongxiang and Dong, Yongqi and Shao, Zhida}, title = {A Latent-Centric Perspective on World Models for Autonomous Driving: Taxonomy, Evaluation, and Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4510-4519} }
CrossZero: Zero-Shot of Unseen Embodiment Manipulation via Object-Centric Hierarchical Diffusion Models-
[pdf]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Yiheng and Yan, Yongqi and Han, Mishuo and LI, Changxian and Ding, Zizhong and Li, Jiaxiang and Yang, Fan and Yang, Tianshuo and Shen, Tong and Ouyang, Changle and Shen, Weijie and Deng, Weiliang and Liu, Shuhao and Qi, Tian and Mu, Yao}, title = {CrossZero: Zero-Shot of Unseen Embodiment Manipulation via Object-Centric Hierarchical Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4446-4454} }
LiteMVS: Efficient Multi-View Stereo with Foundation Distillation and Expert Aggregation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianbao and Liu, Zeyu and Wu, Shuyu and Li, Fanxing and Fan, Zhaoxin and wu, wenjun and Zou, Danping}, title = {LiteMVS: Efficient Multi-View Stereo with Foundation Distillation and Expert Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4520-4529} }
EchoArena: Learning World Models for Reliable VLA Policy Evaluation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yu-Kai and Zhang, Kevin and Chi, Xiaowei and Chen, Tianxing and Huang, Siqiao and Fu, Chuyao and Guo, Tiecheng and Jia, Peidong and Qin, Yan and Ge, Kuangzhi and Qian, Siyuan and Mi, Weishi and Qian, Zezhong and Li, Jiajun and Wuwu, Qingpo and Ju, Xiaozhu and Tang, Jian and Zhang, Shanghang}, title = {EchoArena: Learning World Models for Reliable VLA Policy Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {4486-4494} }

