The 2nd Workshop on Test-time Scaling for Computer Vision
EmbodiedEval: Evaluate Multimodal LLMs as Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zhili and Li, Ran and Hu, Jinyi and Tu, Yuge and Dai, Shiqi and Hu, Shengding and Shi, Yang and Shi, Lei and Sun, Maosong}, title = {EmbodiedEval: Evaluate Multimodal LLMs as Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11420-11432} }
SA-TTS: Stress-Aware Test-Time Scaling for Vision Models-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Youla}, title = {SA-TTS: Stress-Aware Test-Time Scaling for Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11480-11487} }
MetaWorld: Skill Transfer and Composition in a Hierarchical World Model for Grounding High-Level Instructions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yutong and Liu, Hangxu and Pei, Kailin and Xia, Ruizhe and Feng, Tongtong}, title = {MetaWorld: Skill Transfer and Composition in a Hierarchical World Model for Grounding High-Level Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11470-11479} }
TreeReasoner: Reinforcing Tool-Augmented Tree-of-Videos Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hongcheng and Tang, Jingyi and Huang, Zihao and Li, Liang and Su, Li and Huang, Qingming}, title = {TreeReasoner: Reinforcing Tool-Augmented Tree-of-Videos Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11457-11469} }
Understanding the Limits of Vision Test-Time Scaling: Path Redundancy, Instance Difficulty, and Adaptive Compute-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Youla}, title = {Understanding the Limits of Vision Test-Time Scaling: Path Redundancy, Instance Difficulty, and Adaptive Compute}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11450-11456} }
Rethinking Dense Optical Flow without Test-Time Scaling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chanda_2026_CVPR, author = {Chanda, Praroop and Kumar, Suryansh}, title = {Rethinking Dense Optical Flow without Test-Time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11410-11419} }
Mind over Space: Can Multimodal Large Language Models Mentally Navigate?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Qihui and Ruan, Shouwei and Yang, Xiao and Jiang, Hao and Huang, Yao and Zhao, Shiji and Fan, Hanwei and Su, Hang and Wei, Xingxing}, title = {Mind over Space: Can Multimodal Large Language Models Mentally Navigate?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11488-11497} }
IMA & TMA: Efficient Test-Time Adaptation for VLMs via Linear Transformation in Embedding Space-
[pdf]
[supp]
[bibtex]@InProceedings{Vempati_2026_CVPR, author = {Vempati, Rishik Vamshi Rohith and Kadava, Eswar Venkata Sai and Mopuri, Konda Reddy}, title = {IMA \& TMA: Efficient Test-Time Adaptation for VLMs via Linear Transformation in Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11439-11449} }
ProFuse: Efficient Open-Vocabulary 3D Gaussian Splatting with Early-Saturating Semantic Uplifting-
[pdf]
[bibtex]@InProceedings{Chiou_2026_CVPR, author = {Chiou, Yen-Jen}, title = {ProFuse: Efficient Open-Vocabulary 3D Gaussian Splatting with Early-Saturating Semantic Uplifting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, month = {June}, year = {2026}, pages = {11433-11438} }

