Papers
- Back
DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhengxian and Xie, Fei and Xue, Xutao and Zhang, Rui and Huang, Taicheng and Liu, Yang and Ji, Mengqi and Yu, Tao}, title = {DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4942-4952} }
CompBench: Benchmarking Complex Instruction-guided Image Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Bohan and Huang, Wenxuan and Tang, Yuntian and Qiao, Junbo and Liao, Jincheng and Cao, Shaosheng and Zhao, Fei and Feng, Zhaopeng and Gu, Zhouhong and Yin, Zhenfei and Bai, Lei and Ouyang, Wanli and Chen, Lin and Zhao, Fei and Wang, Zihan and Xie, Yuan and Lin, Shaohui}, title = {CompBench: Benchmarking Complex Instruction-guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1112-1122} }
Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlin and Xiong, Ruiqin and Xie, Jiyu and Zhu, Zhenkun and Yu, Zhaofei and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12140-12149} }
Continual Distillation of Teachers from Different Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Michel_2026_CVPR, author = {Michel, Nicolas and Wang, Maorong and He, Jiangpeng and Yamasaki, Toshihiko}, title = {Continual Distillation of Teachers from Different Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10810-10819} }
GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shekhar_2026_CVPR, author = {Shekhar, Shivanshu and Bhattacharya, Uttaran and Addanki, Raghavendra and Tanjim, Mehrab and Sarkhel, Somdeb and Zhang, Tong}, title = {GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9847-9858} }
Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Lu and Yin, Guosheng}, title = {Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6167-6175} }
Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Seo, Hoigi and Chun, Se Young}, title = {Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4547-4557} }
Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sinha_2026_CVPR, author = {Sinha, Abhishek Kumar and Dube, Nitant and Biswas, Soma}, title = {Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3856-3865} }
AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Xiya and Zhao, Qinglin and Feng, Li}, title = {AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1418-1427} }
White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuwei and Tan, Lei and Tan, Robby T.}, title = {White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1331-1341} }
Reallocating Attention Across Layers to Reduce Multimodal Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Haolang and Chu, Bolun and Fu, WeiYe and Nan, Guoshun and Liu, Junning and Pan, Minghui and Li, Qiankun and Yu, Yi and Wang, Hua and Wang, Kun}, title = {Reallocating Attention Across Layers to Reduce Multimodal Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4157-4167} }
PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zekai and Zheng, Xu}, title = {PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9647-9657} }
Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Hoigi and Lee, Byung Hyun and Cho, Jaehyun and Lim, Sungjin and Chun, Se Young}, title = {Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10262-10272} }
REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Di and Liu, Liu and Huang, Anran and Liu, Yuyan and Yu, Qiaojun and Liu, Shaofan and Song, Liangtu and Lu, Cewu}, title = {REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1177-1186} }
Efficient and High-Fidelity Omni Modality Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2026_CVPR, author = {Huynh, Chuong and Luong, Manh and Shrivastava, Abhinav}, title = {Efficient and High-Fidelity Omni Modality Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8770-8780} }
What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1-
[pdf]
[supp]
[bibtex]@InProceedings{Pierard_2026_CVPR, author = {Pi\'erard, S\'ebastien and Deli\`ege, Adrien and Van Droogenbroeck, Marc}, title = {What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9722-9731} }
RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qiucheng and Shi, Jing and Jenni, Simon and Kafle, Kushal and Wang, Tianyu and Chang, Shiyu and Zhao, Handong}, title = {RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12279-12288} }
LongStream: Long-Sequence Streaming Autoregressive Visual Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Chong and Chen, Xianda and Xie, Tao and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Guo, Xiaoyang and Wang, Hao}, title = {LongStream: Long-Sequence Streaming Autoregressive Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {272-283} }
Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances-
[pdf]
[supp]
[bibtex]@InProceedings{Hollidt_2026_CVPR, author = {Hollidt, Dominik and Bendinelli, Tommaso and Holz, Christian}, title = {Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7036-7046} }
APPO: Attention-guided Perception Policy Optimization for Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Henghui and Zhou, Chang and Chen, Xi and Hu, Di}, title = {APPO: Attention-guided Perception Policy Optimization for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12269-12278} }
An Efficient Token Compression Framework for Visual Object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Weijing and Liang, Qihua and Zhong, Bineng and Xia, Haiying and Mo, Zhiyi and Song, Shuxiang}, title = {An Efficient Token Compression Framework for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6857-6867} }
GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiajin and Fan, Dongzhe and Ji, Chuanhao and Zha, Daochen and Tan, Qiaoyu}, title = {GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9491-9500} }
MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaar_2026_CVPR, author = {Shaar, Shaden and Thymes, Bradon and Chaixanien, Sirawut and Cardie, Claire and Hariharan, Bharath}, title = {MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4537-4546} }
PhysHead: Simulation-Ready Gaussian Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabadayi_2026_CVPR, author = {Kabadayi, Berna and Sklyarova, Vanessa and Zielonka, Wojciech and Thies, Justus and Pons-Moll, Gerard}, title = {PhysHead: Simulation-Ready Gaussian Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4109-4121} }
Physical Simulator In-the-Loop Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Foo_2026_CVPR, author = {Foo, Lin Geng and Huang, Mark He and Lattas, Alexandros and Moschoglou, Stylianos and Beeler, Thabo and Theobalt, Christian}, title = {Physical Simulator In-the-Loop Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4301-4311} }
JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yifan and Wang, Juntuo and Qiao, Yuming and Zhang, Xudong and Yu, Chunyang and Li, Yan and Lin, Xiao and Luo, Liang and Meng, Dan}, title = {JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11684-11693} }
Functional Mean Flow in Hilbert Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiqi and Sun, Yuchen and Turk, Greg and Zhu, Bo}, title = {Functional Mean Flow in Hilbert Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1928-1938} }
One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bianchi_2026_CVPR, author = {Bianchi, Lorenzo and Pacini, Giacomo and Carrara, Fabio and Messina, Nicola and Amato, Giuseppe and Falchi, Fabrizio}, title = {One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5532-5542} }
Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Ziwei and Zeng, Fanhu and Fang, Hongjian and Wang, Rui-Qi and Chen, Renxing and Zhu, Yanan and Chen, Yi and Yang, Peipei and Zhang, Xu-Yao}, title = {Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3500-3510} }
GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guanjie and Chen, Chen}, title = {GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12934-12943} }
SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Xiaogang and Zhang, Jiawei and Liu, Tongfei and Lei, Tao and Wang, Yingbo}, title = {SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8481-8491} }
Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Li, Yulu and Fang, Liangda and Zhang, Jusheng and Zheng, Yongsen and Guan, Quanlong and Chen, Xipeng}, title = {Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3091-3100} }
The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Ziheng and Song, Yiren and Liu, Yaoli and Zhu, Shihao and Hou, Qibin and Cheng, Ming-Ming and Shou, Mike Zheng}, title = {The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2035-2046} }
Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention-
[pdf]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Shezheng and Li, Shasha and Zhao, Shan and Li, Xiaopeng and Wan, Qian and Wang, Chengyu and Yan, Tianwei and Jun, Ma and Yu, Jie}, title = {Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10051-10060} }
Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chubin and Hu, Sujie and Zhu, Jiashu and Wu, Meiqi and Chen, Jintao and Li, Yanxun and Huang, Nisha and Fang, Chengyu and Wu, Jiahong and Chu, Xiangxiang and Li, Xiu}, title = {Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12775-12786} }
DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Kim, Jongha and Kim, Sehyung and Ko, Dohwan and Kil, Jihyung and Kim, Hyunwoo J.}, title = {DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3543-3552} }
POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Anjie and Peng, Can and Liu, Yuyuan and Noble, J. Alison}, title = {POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10273-10282} }
Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yifei and Zheng, Wenzhao and Zhang, Yanran and Sun, Runze and Zheng, Yu and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4482-4493} }
MAMMA: Markerless Accurate Multi-person Motion Acquisition-
[pdf]
[supp]
[bibtex]@InProceedings{Velasquez_2026_CVPR, author = {Velasquez, Hanz Cuevas and Yiannakidis, Anastasios and Shin, Soyong and Becherini, Giorgio and H\"oschle, Markus and Tesch, Joachim and Obersat, Taylor and Alexiadis, Tsvetelina and Halilaj, Eni and Black, Michael J.}, title = {MAMMA: Markerless Accurate Multi-person Motion Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7175-7186} }
First Frame Is the Place to Go for Video Content Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Li, Zongxia and Liu, Zhichao and Shi, Guangyao and Wu, Xiyang and Liu, Fuxiao and Ferm\"uller, Cornelia and Feng, Brandon Y. and Aloimonos, Yiannis}, title = {First Frame Is the Place to Go for Video Content Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9243-9252} }
HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiashu and Han, Xumeng and Wei, Zhaoyang and Wang, Zipeng and Wang, Kuiran and Li, Guorong and Han, Zhenjun and Jiao, Jianbin}, title = {HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11788-11797} }
Bidirectional Query-Driven Generation of Parametric CAD Sketch-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Ren, Daxuan and Ding, Yijie and Zheng, Jianmin and Deng, Fang}, title = {Bidirectional Query-Driven Generation of Parametric CAD Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3176-3185} }
SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Peng, Tao and Yuan, Yongzhe and Feng, Kaiyuan and Li, Hao and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7546-7555} }
Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10799-10809} }
SAM 3D Body: Robust Full-Body Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xitong and Kukreja, Devansh and Pinkus, Don and Fan, Taosha and Park, Jinhyung and Shin, Soyong and Cao, Jinkun and Liu, Jia-Wei and Ugrinovic, Nicol\'as and Sagar, Anushka and Malik, Jitendra and Feiszli, Matt and Doll\'ar, Piotr and Kitani, Kris}, title = {SAM 3D Body: Robust Full-Body Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7209-7219} }
GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yuxiao and Chen, Junchi and Jin, Zhenchao and Miao, Changtao and Yuan, Haojie and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11912-11922} }
Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Hao and Zhang, Runqing and Ding, Jin and Zhou, Xue and Zou, Jianxiao and Cai, Mingzhu}, title = {Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9689-9698} }
Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanzhe and Chen, Hao and Yin, Rui and Ba, Juyan and Zhang, Yu and Lu, Sheng}, title = {Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2490-2501} }
AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Gu, Jianyang and Liu, Hao and Cao, Yue and Hamari, Jozsef and Liu, Zheng and Zardadi, Mohsen}, title = {AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10106-10115} }
OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Rui and Wu, Huisi and Qin, Jing}, title = {OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1428-1438} }
VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Wenhao and Wang, Hao and Yin, Wanqi and Liu, Fayao and Yang, Xulei and Liang, Chao and Cai, Zhongang and Lin, Guosheng}, title = {VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13918-13929} }
PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jingyuan and Hu, Yumeng and Gao, Fei and Zhang, Mingjin}, title = {PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11818-11828} }
Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ray and Greiff, Marcus and Lew, Thomas and Subosits, John}, title = {Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2948-2958} }
MultiAnimate: Pose-Guided Image Animation Made Extensible-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yingcheng and Gong, Haowen and Yang, Chuanguang and An, Zhulin and Xu, Yongjun and Liu, Songhua}, title = {MultiAnimate: Pose-Guided Image Animation Made Extensible}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9306-9316} }
AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongjie and Yu, Heng and Li, Jiaman and Yu, Hong-Xing and Adeli, Ehsan and Liu, C. Karen and Wu, Jiajun}, title = {AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13876-13886} }
Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Euisoo and Kim, Byunghyun and Kim, Hyunjin and Cho, Seonghye and Lee, Jae-Gil}, title = {Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9374-9383} }
The Missing Point in Vision Transformers for Universal Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shahabodini_2026_CVPR, author = {Shahabodini, Sajjad and Mansoori, Mobina and Bayatmakou, Farnoush and Abouei, Jamshid and Plataniotis, Konstantinos and Mohammadi, Arash}, title = {The Missing Point in Vision Transformers for Universal Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6314-6324} }
AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ye and Liu, Shouyi and Yang, Huiyu and Gu, Jianghang and Fan, Wenhao and Yang, Zhongxin and Wang, Ding and Chen, Simeng and Jiang, Zirun and Bin, Yuanwei and Chen, Shiyi and Chen, Yuntian}, title = {AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11694-11703} }
StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhiyao and Peng, Ziqiao and Ma, Yifeng and Chen, Yi and Zhou, Zhengguang and Zhou, Zixiang and Zhang, Guozhen and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Liu, Yong-Jin}, title = {StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10887-10897} }
Dynamics-Aware Preference Optimization for Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Cai, Kaitong and Yang, Jing and Wang, Jian and Wang, Keze}, title = {Dynamics-Aware Preference Optimization for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11610-11620} }
CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Sohwi and Hyoseok, Lee and Park, Jungjoon and Oh, Tae-Hyun}, title = {CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9679-9688} }
The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vyas_2026_CVPR, author = {Vyas, Kushal and Kayabasi, Alper and Kim, Daniel and Saragadam, Vishwanath and Veeraraghavan, Ashok and Balakrishnan, Guha}, title = {The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6116-6125} }
SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jingyu and Wu, Junjie and Hu, Dongnan and Huang, Xiangkai and Sun, Bin and Hao, Zhihui and Lang, Xianpeng and Zhu, Xiatian and Zhang, Li}, title = {SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4032-4042} }
Are Image-to-Video Models Good Zero-Shot Image Editors?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zechuan and Chen, Zhenyuan and Yang, Zongxin and Yang, Yi}, title = {Are Image-to-Video Models Good Zero-Shot Image Editors?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2090-2103} }
RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Ji and Ying, Xianghua and Xing, Bowei and Guo, Ruohao and Yue, Wenzhen}, title = {RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4870-4880} }
From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajie and Schwertfeger, S\"oren and Kleiner, Alexander}, title = {From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6750-6759} }
Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7058-7067} }
One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Linghui and Liu, Yuhan and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2832-2842} }
SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chang-Hsun and Chang, Kai-Po and Sheng, Yu-Yang and Chung, Hung-Kai and Wang, Kuei-Chun and Wang, Yu-Chiang Frank}, title = {SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11096-11105} }
A Training-Free Style-Personalization via SVD-Based Feature Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kyoungmin and Park, Jihun and Gim, Jongmin and Choi, Wonhyeok and Hwang, Kyumin and Kim, Jaeyeul and Im, Sunghoon}, title = {A Training-Free Style-Personalization via SVD-Based Feature Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {506-516} }
Unlocking Token Rewards via Training-Free Reward Attribution-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Sitong and Tan, Haoru and Xia, Bin and Zhang, Xichen and Li, Jingyao and Zhang, Shaofeng and Qi, Xiaojuan and Yu, Bei and Jia, Jiaya}, title = {Unlocking Token Rewards via Training-Free Reward Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5082-5091} }
FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Hang and Fan, Hongwei and Zhang, Han and Wu, Duojin and Zhang, Jiyao and Dong, Hao}, title = {FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11777-11787} }
HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Jindi and Zhou, Yuhao and Tian, Yuxin and Ye, Qing and Feng, Wentao and Lv, Jiancheng}, title = {HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12955-12965} }
Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wentao and Lyu, Weimin and Lou, Peiliang and Hu, Qingqiao and Hu, Xiaoling and Abousamra, Shahira and Han, Wenchao and Guo, Ruifeng and Zhou, Jiawei and Chen, Chao and Wang, Chen}, title = {Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6972-6981} }
Does YOLO Really Need to See Every Training Image in Every Epoch?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Xingxing and Dong, Jiahua and Han, Junwei and Cheng, Gong}, title = {Does YOLO Really Need to See Every Training Image in Every Epoch?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {135-144} }
Learning What Helps: Task-Aligned Context Selection for Vision Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jingyu and Konuk, Emir and Strand, Fredrik and Matsoukas, Christos and Smith, Kevin}, title = {Learning What Helps: Task-Aligned Context Selection for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11632-11642} }
Scaling Spatial Intelligence with Multimodal Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Zhongang and Wang, Ruisi and Gu, Chenyang and Pu, Fanyi and Xu, Junxiang and Wang, Yubo and Yin, Wanqi and Yang, Zhitao and Wei, Chen and Zhou, Tongxi and Sun, Qingping and Pang, Hui En and Li, Jiaqi and Qian, Oscar and Lin, Zhiqian and Shi, Xuanke and Deng, Kewang and Han, Xiaoyang and Chen, Zukai and Fan, Xiangyu and Deng, Hanming and Lu, Lewei and Pan, Liang and Li, Bo and Liu, Ziwei and Wang, Quan and Lin, Dahua and Yang, Lei}, title = {Scaling Spatial Intelligence with Multimodal Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7879-7890} }
MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Junyoung and Kim, Seokwon and Kim, Jung Uk}, title = {MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4718-4727} }
Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Lu, Yang and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13168-13178} }
CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Pei and Zhang, Shanshan and Yang, Jian}, title = {CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7121-7130} }
ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenjie and Zhang, Yabin and Jin, Xin and Zeng, Wenjun and Zhang, Lei}, title = {ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20-30} }
Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Junghwan and Cho, Woojin and Heo, Junhyuk and Kwon, Darongsae and Lee, Kookjin}, title = {Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {860-870} }
GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Ruoke and Yang, Mingjia and Zhang, Xinfeng and Tang, Haocheng and Yin, Qian and Deng, Zhipin and Zhang, Kai and Zhang, Li and Ma, Siwei}, title = {GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4963-4972} }
Rethinking Occlusion Modeling for UAV Tracking-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Yu, Xincheng and Lin, Yi}, title = {Rethinking Occlusion Modeling for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13563-13573} }
LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, He and Guo, Yujun and He, Wei}, title = {LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10556-10566} }
Exploring Spatial Intelligence from a Generative Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Muzhi and Jiang, Shunyao and Zheng, Huanyi and Luo, Zekai and Zhong, Hao and Li, Anzhou and Wang, Kaijun and Rong, Jintao and Liu, Yang and Chen, Hao and Lin, Tao and Shen, Chunhua}, title = {Exploring Spatial Intelligence from a Generative Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2582-2592} }
The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Runhao and Wang, Hanshi and Yang, Yixiang and Ma, Qianli and Zhou, Jingmeng and Zhang, Zhipeng}, title = {The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10621-10631} }
CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sungyong and Choi, Sooyoung and Koh, Hyunsuh and Choi, Youngjae and Kim, Heewon}, title = {CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3794-3804} }
InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashutosh and Saini, Rajat and Pan, Jingjing and Erdogan, Mustafa and Zhang, Mingfang and Le Dem, Betty and Kobori, Norimasa and Kong, Quan}, title = {InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3079-3090} }
WaDi: Weight Direction-aware Distillation for One-step Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lei and Cheng, Yang and Li, Senmao and Wu, Ge and Wang, Yaxing and Yang, Jian}, title = {WaDi: Weight Direction-aware Distillation for One-step Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5574-5584} }
Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyue and Lu, Yang and Tian, Jiandong}, title = {Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5680-5689} }
PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Deutsch_2026_CVPR, author = {Deutsch, Isaac and Mo\"enne-Loccoz, Nicolas and State, Gavriel and Gojcic, Zan}, title = {PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7289-7298} }
MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Tianyu and Wang, Zefan and Wang, Chongyi and Huang, Fuwei and Ma, Wenshuo and He, Zhihui and Cai, Tianchi and Chen, Weize and Huang, Yuxiang and Zhao, Ranchi and Xu, Bokai and Cui, Junbo and Xu, Yingjing and Ruan, Liqing and Zhang, Luoyuan and Liu, Hanyu and Tang, Jingkun and Liu, Hongyuan and Guo, Qining and Hu, Wenhao and He, Bingxiang and Zhou, Jie and Cai, Jie and Qi, Ji and Guo, Zonghao and Chen, Chi and Zeng, Guoyang and Li, Yuxuan and Cui, Ganqu and Ding, Ning and Han, Xu and Yao, Yuan and Liu, Zhiyuan and Sun, Maosong}, title = {MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11704-11715} }
Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaodong and Bao, Qian and Liu, Xudong and Fang, Jianping and Fang, Jintao and Zhang, Yongdong and Mei, Tao and Liu, Wu}, title = {Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9342-9351} }
ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuantao and Chang, Jiahao and Ye, Chongjie and Zhang, Chaoran and Fang, Zhaojie and Li, Chenghong and Han, Xiaoguang}, title = {ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8868-8879} }
Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yunhe and Zhang, Yabin and Wang, Chong and Liu, Jiaming and Varma, Maya and Delbrouck, Jean-Benoit and Chaudhari, Akshay and Langlotz, Curtis}, title = {Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13744-13754} }
Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Kejia and Zhou, Haoyang and Xu, Ruoyu and Wang, Peicheng and Song, Mingli and Zhang, Haofei}, title = {Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5359-5368} }
TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Li, Ya-Li and Wang, Yuan and Wang, Shengjin}, title = {TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10943-10953} }
Make it SING: Analyzing Semantic Invariants in Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadid_2026_CVPR, author = {Yadid, Harel and Levi, Meir Yossef and Betser, Roy and Gilboa, Guy}, title = {Make it SING: Analyzing Semantic Invariants in Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9911-9920} }
PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Anni and Yang, Yu-Bin}, title = {PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2853-2863} }
Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Xijie and Zhu, Lin and Zhang, Wei and Tian, Yonghong}, title = {Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {936-945} }
BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Miaowei and Yan, Qingxuan and Cao, Zhi and Li, Yayuan and Mac Aodha, Oisin and Corso, Jason J and Vaxman, Amir}, title = {BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10152-10164} }
JRM: Joint Reconstruction Model for Multiple Objects without Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qirui and Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Newcombe, Richard and Chang, Angel X. and Engel, Jakob and Howard-Jenkins, Henry}, title = {JRM: Joint Reconstruction Model for Multiple Objects without Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {307-316} }
CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Maoyuan and Gao, Yutong and Huang, Xinyang and Sun, Lijuan and Nan, Guoshun and Zhu, Chuang}, title = {CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3154-3164} }
EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, GwangWook and Lee, Hyo-Jun and Baek, Jong-Hyeon and Kim, Hanul and Koh, Yeong Jun}, title = {EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2625-2634} }
LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Zebin and Nie, Shen and Zhang, Xiaolu and ZHOU, JUN and Lu, Zhiwu and Wen, Ji-Rong and Li, Chongxuan}, title = {LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10093-10105} }
C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haitman_2026_CVPR, author = {Haitman, Yuval and Efraim, Amit and Francos, Joseph M.}, title = {C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3004-3013} }
Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Zhou, Zhangjun and Guo, Diandian and Li, Zhixi and Qin, Jing and Du, Bo and Heng, Pheng-Ann}, title = {Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1396-1405} }
CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Chenxi and Deng, Yongheng and Liu, Jiani and Zhang, Yujia and Chen, Xi and Ren, Ju}, title = {CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10492-10501} }
EVA: Efficient Reinforcement Learning for End-to-End Video Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaolun and Wang, Ruohui and Wang, Jiahao and Tang, Yepeng and Zheng, Xuanyu and Duan, Haonan and Lu, Hao and Deng, Hanming and Lu, Lewei}, title = {EVA: Efficient Reinforcement Learning for End-to-End Video Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12289-12299} }
When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Junrong and Deng, Weijian and Wei, Pengxu and Chen, Yaqin and Ye, Qixiang and Lin, Liang}, title = {When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9445-9454} }
MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiyu and Toisoul, Antoine and Monnier, Tom and Shapovalov, Roman and Ranjan, Rakesh and Tan, Ping and Vedaldi, Andrea}, title = {MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5849-5858} }
CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning-
[pdf]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Yubin and Hou, Boyang and Meng, Yuan and Luo, Wenting and Ji, Jiayi and Sun, Xiaoshuai}, title = {CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10721-10730} }
Rectifying Latent Space for Generative Single-Image Reflection Removal-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingjia and Hu, Jin and Wang, Hainuo and Hu, Qiming and Wang, Jiarui and Guo, Xiaojie}, title = {Rectifying Latent Space for Generative Single-Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8397-8407} }
Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Tang, Guijian and Hu, Kun and Wang, Haotian and Liu, Shixuan and Yang, Wenjing and Lan, Long and Tan, Huibin}, title = {Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4188-4198} }
Refracting Reality: Generating Images with Realistic Transparent Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Yue and Tao, Enze and Campbell, Dylan}, title = {Refracting Reality: Generating Images with Realistic Transparent Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4312-4321} }
Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziyi and Shen, Li and Ye, Deheng and Luo, Yong and Zhao, Huangxuan and Liu, Meng and Yu, Wei and Zhang, Lefei}, title = {Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2401-2411} }
Tri-Modal Fusion Transformers for UAV-based Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Iaboni_2026_CVPR, author = {Iaboni, Craig and Abichandani, Pramod}, title = {Tri-Modal Fusion Transformers for UAV-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4373-4382} }
RetFormer: Multimodal Retrieval for Enhancing Image Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Tianrui and Liang, Xiubo and Wang, Hongzhi}, title = {RetFormer: Multimodal Retrieval for Enhancing Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2704-2714} }
Twin-T & TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Jiahua and Cheng, Siyao and Du, Jiaxing and Xia, Qingtao and He, Changjiang and Lang, Zeming and Liu, Jie}, title = {Twin-T \& TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4850-4859} }
HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Chengjie and Pan, Cong and Liu, Zijian and Liu, Ningzhong and Qin, Jie}, title = {HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10976-10985} }
NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sanyal_2026_CVPR, author = {Sanyal, Subhajit and Miriyala, Srinivas Soumitri and Bankar, Akshay Janardan and Arveti, Manjunath and Vajrala, Sowmya and Pandith, Shreyas and Kodavanti, Sravanth and Ameta, Abhishek and Harshit, Harshit and Unde, Amit Satish}, title = {NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8449-8459} }
Teacher-Guided Routing for Sparse Vision Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kada_2026_CVPR, author = {Kada, Masahiro and Yoshihashi, Ryota and Ikehata, Satoshi and Kawakami, Rei and Sato, Ikuro}, title = {Teacher-Guided Routing for Sparse Vision Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6198-6208} }
AstraNav-Memory: Contexts Compression for Long Memory-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junjun and Xue, Xinda and Ren, Botao and Luo, Minghua and Chen, Jintao and Bai, Haochen and You, Liangliang and Xu, Mu}, title = {AstraNav-Memory: Contexts Compression for Long Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8097-8109} }
Globally Optimal Pose from Orthographic Silhouettes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Agniva and Kus, Dilara and Li, Jianning and Zachow, Stefan}, title = {Globally Optimal Pose from Orthographic Silhouettes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11029-11038} }
Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Minh-Duong and Wanasekara, Senura and Nguyen, Le-Tuan and Pham, Quoc-Viet and Yong, Ken-Tye and Tran, Nguyen H. and Le, Dung D.}, title = {Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3347-3357} }
Mind the Gap: Transferring Labels to Align Object Detection Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Kennerley_2026_CVPR, author = {Kennerley, Mikhail and Aviles-Rivero, Angelica I. and Sch\"onlieb, Carola-Bibiane and Tan, Robby T.}, title = {Mind the Gap: Transferring Labels to Align Object Detection Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4353-4362} }
Decoupled Generative Modeling for Human-Object Interaction Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Hwanhee and Lee, Seunggwan and Yoon, Jeongyoon and Kim, SeungHyeon and Nam, Giljoo and Huang, Qixing and Kim, Sangpil}, title = {Decoupled Generative Modeling for Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2253-2263} }
SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Sashuai and Zhou, Qiang and Ma, Junpeng and Cao, Yue and Hu, Ruofan and Zhang, Ziang and Yang, Xiaoda and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Zhao, Zhou}, title = {SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {647-658} }
Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yisheng}, title = {Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4145-4156} }
Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Sizhong and Weber, Ramon Elias and Lu, Xinzheng}, title = {Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10430-10440} }
RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuezhen and Ma, Li and Shen, Yulin and Wang, Zeyu and Sander, Pedro V.}, title = {RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7340-7350} }
Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Wei, Tianyi and Yang, Shuai and Pan, Xingang}, title = {Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9424-9433} }
Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhangquan and Zhang, Manyuan and Yu, Xinlei and Luo, Xufang and Sun, Mingze and Pan, Zihao and An, Xiang and Feng, Yan and Pei, Peng and Cai, Xunliang and Huang, Ruqi}, title = {Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2613-2624} }
Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shuo and Li, Yijin and Zheng, Xi and Zhang, Guofeng}, title = {Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7266-7277} }
RAVEN: Erasing Invisible Watermarks via Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shamshad_2026_CVPR, author = {Shamshad, Fahad and Lukas, Nils and Nandakumar, Karthik}, title = {RAVEN: Erasing Invisible Watermarks via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {82-91} }
Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Haoran and Huang, Guoxi and Zhang, Fan and Bull, David and Anantrasirichai, Nantheera}, title = {Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11716-11725} }
Paparazzo: Active Mapping of Moving 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Allegro_2026_CVPR, author = {Allegro, Davide and Li, Shiyao and Ghidoni, Stefano and Lepetit, Vincent}, title = {Paparazzo: Active Mapping of Moving 3D Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12586-12594} }
FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Fengyuan and Jin, Haiyan and Zhang, Yuanlin and Xiao, Zhaolin and Wang, Bin and Mu, Yuerong}, title = {FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6837-6846} }
Residual Diffusion Bridge Model for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hebaixu and Zhang, Jing and Chen, Haoyang and Guo, Haonan and Wang, Di and Ma, Jiayi and Du, Bo}, title = {Residual Diffusion Bridge Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8375-8386} }
Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ernst_2026_CVPR, author = {Ernst, Jonas and Boettcher, Wolfgang and Hoyer, Lukas and Lenssen, Jan Eric and Schiele, Bernt}, title = {Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13091-13101} }
Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Jaeyun and Shin, Seunghui and Park, Taeho and Hwang, Hyoseok}, title = {Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9604-9614} }
QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Weiyu and Li, Ru and Liu, Jiaqi and Zhao, Sizhe and Liu, Qinglin and Zhang, Shengping}, title = {QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2603-2612} }
DREAM: Document Recognition with Explicit Adaptive Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Tianqi and Wu, Di and Peng, Liangrui and Huang, Yifan and Zhao, Kemeng and Li, Shuo and Li, Zhiyu and Wang, Yizhu and Jiang, Borui and Li, Yuyang}, title = {DREAM: Document Recognition with Explicit Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2715-2724} }
RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Lixin and Zheng, Chengwei and Paschalidis, Georgios and Guo, Chen and Kaufmann, Manuel and Zarate, Juan and Tzionas, Dimitrios}, title = {RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13835-13845} }
Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Chen, Qi and He, Honghao and Sun, Linzhuang and He, Conghui and Wu, Lijun and Yu, Bihui and Tan, Cheng}, title = {Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2547-2556} }
Geometric Neural Distance Fields for Learning Human Motion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Zhengdi and Foti, Simone and Zhang, Linguang and Labs, g921@gmail.com Meta Reality and Zhao, Amy and Keskin, Cem and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Geometric Neural Distance Fields for Learning Human Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2232-2242} }
Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yulong and Li, Shijie and Li, Ziyi and Qi, Lin}, title = {Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6941-6950} }
Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qing and Li, Xuesong and Zhang, Jing}, title = {Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2526-2536} }
The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haiyang and Pu, Nan and Cai, Yaqi and Long, Teng and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3563-3573} }
IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Hong, Yuyang and Xia, Yang and Ding, Kun and Zhang, Zeyu and Wang, Ying and Xiang, Shiming and Pan, Chunhong}, title = {IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8205-8215} }
DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Rivero, Diego and Apostolides, Luke and Ghosh, Suman and Liang, Paul Pu and Raskar, Ramesh}, title = {DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3046-3055} }
FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ballegeer_2026_CVPR, author = {Ballegeer, Matteo and Benoit, Dries F.}, title = {FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3024-3034} }
RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Lin, Endian and Jiang, Donghong and Zhu, Chuang}, title = {RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10377-10386} }
Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jianhui and Zhou, Jian and Zhou, Zhi and Huang, Zhangjin and Li, Chao}, title = {Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5316-5325} }
HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuerui and Wang, Xuehao and Zhuang, Zhan and Zhao, Linglan and Li, Ziyue and Zhang, Xinmin and Song, Zhihuan and Zhang, Yu}, title = {HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10863-10873} }
Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Chanhyuk and Kim, Taesoo and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1759-1770} }
Harnessing the Power of Foundation Models for Accurate Material Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Qingran and Yang, Fengwei and Zhu, Chaolun}, title = {Harnessing the Power of Foundation Models for Accurate Material Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3636-3645} }
OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yiwen and Zheng, Ce and Wang, Yufu and Yang, Hsueh-Han Daniel and Wen, Liting and Jeni, L\'aszl\'o A.}, title = {OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13951-13961} }
I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Jun_2026_CVPR, author = {Jun, Youngjun and Kang, Seil and Han, Woojung and Hwang, Seong Jae}, title = {I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11525-11535} }
Parallel Jacobi Decoding for Fast Autoregressive Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Boya and Li, Ying and Jian, Siyong and Wang, Huan}, title = {Parallel Jacobi Decoding for Fast Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9008-9018} }
HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Jiang, Fei and Zhu, Dandan and Zhou, Aimin}, title = {HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13856-13865} }
CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Naiyu and Wang, Hanjing and Yu, Yue and Gao, Tian and Dhurandhar, Amit and Lee, Chung-Hao and Ji, Qiang}, title = {CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10522-10532} }
MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yongjian and Zou, Xu and Chen, Wenjun and Li, Huixuan and Xie, Xiaoen and Li, Chunxi and Huang, Shixiang and Zhang, Gang and Zhou, Jiahuan and Zhong, Sheng and Yan, Luxin}, title = {MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11726-11735} }
3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoye and Tang, Chen and Yue, Xiangyu and Li, Wei-Hong}, title = {3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5793-5803} }
Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Junfeng and Zhou, Wenyang and Li, Xueheng and He, Xuanhua and Gan, Jianhou and Ren, Wenqi}, title = {Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13234-13243} }
Repurposing 3D Generative Model for Autoregressive Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Niu, Yifan and Huang, Zehuan and Sun, Yang-Tian and Guo, Chunchao and Peng, Yuxin and Sheng, Lu}, title = {Repurposing 3D Generative Model for Autoregressive Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3231-3243} }
RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Mochu and Shen, Zhelun and Li, Xuesong and Ren, Jiahui and Zhang, Jing and Zhao, Chen and Liu, Shanshan and Feng, Haocheng and Wang, Jingdong and Dai, Yuchao}, title = {RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {406-416} }
AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Long and Wang, Hui and Xu, Man and Li, Zexuan and Fan, Zizhu}, title = {AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4438-4447} }
Global Information Thresholding for Sufficient and Necessary Circuits-
[pdf]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Jegyeong}, title = {Global Information Thresholding for Sufficient and Necessary Circuits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3264-3273} }
GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Chaoqun and Fu, Zongjing and Chang, Powei and Zhang, Jinpeng and Xiang, Jianxiang and Gao, Yukang and Wang, Chenyu}, title = {GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9404-9413} }
WonderZoom: Multi-Scale 3D World Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jin and Yu, Hong-Xing and Wu, Jiajun}, title = {WonderZoom: Multi-Scale 3D World Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5859-5869} }
Concept-Aware Batch Sampling Improves Language-Image Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Adhiraj and Udandarao, Vishaal and Nguyen, Thao and Farina, Matteo and Cherti, Mehdi and Jitsev, Jenia and Oh, Sewoong and Ricci, Elisa and Schmidt, Ludwig and Bethge, Matthias}, title = {Concept-Aware Batch Sampling Improves Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3056-3068} }
GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Xin and Liu, Xiabi and Pan, Liyuan}, title = {GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10778-10788} }
Any Resolution Any Geometry: From Multi-View To Multi-Patch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Wenqing and Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Idoughi, Ramzi and Tang, Xiangjun and Wonka, Peter}, title = {Any Resolution Any Geometry: From Multi-View To Multi-Patch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12576-12585} }
Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Jiayun and Chai, Haolong and Huang, Xueying and Guo, Xiaoqing and Zheng, Zengwei and Zhou, Zhan and Wang, Junmei and Wang, Xinyu and Liu, Jie and Zhou, Binbin}, title = {Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6962-6971} }
Mitigating Error Amplification in Fast Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Mengnan and Zhang, Lihe and Wang, Bo and Zheng, Tianhang and Zhong, Hong and Min, Geyong}, title = {Mitigating Error Amplification in Fast Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13346-13355} }
Complet4R: Geometric Complete 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Weibang and Li, Kenan and Chen, Zhuoguang and Yuan, Yijun and Zhao, Hang}, title = {Complet4R: Geometric Complete 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {341-351} }
Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ke and He, Xuanhua and Li, Xueheng and Zhu, Lingting and Wang, Yingying and Ma, Ao and Zhang, Zhanjie and Zhou, Man and Xie, Chengjun and Zhang, Jie}, title = {Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13211-13221} }
MatLat: Material Latent Space for PBR Texture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2026_CVPR, author = {Yeo, Kyeongmin and Min, Yunhong and Kim, Jaihoon and Sung, Minhyuk}, title = {MatLat: Material Latent Space for PBR Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4602-4612} }
LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xinran and Lai, Shuichang and Lyu, Jiangjing and Li, Hongjie and Pan, Bowen and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5945-5955} }
StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Zhihao and Wei, Wenkang and Fang, Yuan and Yu, Xingtong and Zhang, Hui and Zhu, Weicheng and Zhang, Xin}, title = {StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5114-5124} }
A Causal Marriage between VLM and IRM from Understanding to Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Xiao, Tianang and Zhang, Jusheng and Zheng, Yongsen and Liu, Yang and Lai, Zhao-rong and Lin, Liang}, title = {A Causal Marriage between VLM and IRM from Understanding to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4749-4760} }
Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yifan and Zhou, Kun and Min, Yingqian and Ling, Yue and Zhao, Wayne Xin and Wu, Youbin and Wen, Ji-Rong}, title = {Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12019-12029} }
Leveraging Multispectral Sensors for Color Correction in Mobile Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cogo_2026_CVPR, author = {Cogo, Luca and Buzzelli, Marco and Bianco, Simone and Vazquez-Corral, Javier and Schettini, Raimondo}, title = {Leveraging Multispectral Sensors for Color Correction in Mobile Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12438-12447} }
Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Sicheng and Deng, Yu and Hu, Shoukang and Wang, Yichuan and Zhang, Yizhong and Chen, Zhan and Yang, Jiaolong and Guo, Baining}, title = {Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9284-9295} }
Anchoring and Rescaling Attention for Semantically Coherent Inbetweening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Tae Eun and Shim, Sumin and Kim, Junhyeok and Hwang, Seong Jae}, title = {Anchoring and Rescaling Attention for Semantically Coherent Inbetweening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8976-8985} }
Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Bo and Ze, Xueyang and Wang, Beibei and Wang, Xixi and Wan, Xixi and Luo, Bin}, title = {Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11664-11673} }
HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yangguang and Fang, Quan and Li, Yufei and Sun, Jiachen and Gao, Junyu and Sang, Jitao}, title = {HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11086-11095} }
LiveGesture: Streamable Co-Speech Gesture Generation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saleem_2026_CVPR, author = {Saleem, Muhammad Usama and Patel, Mayur Jagdishbhai and Pinyoanuntapong, Ekkasit and Qin, Zhongxing and Yang, Li and Xue, Hongfei and Helmy, Ahmed and Chen, Chen and Wang, Pu}, title = {LiveGesture: Streamable Co-Speech Gesture Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2264-2273} }
PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction & Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Oroz_2026_CVPR, author = {Oroz, Antonio and Nie{\ss}ner, Matthias and Kirschstein, Tobias}, title = {PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction \& Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4097-4108} }
Affine Perspective-Three-Point Problem-
[pdf]
[supp]
[bibtex]@InProceedings{Nakano_2026_CVPR, author = {Nakano, Gaku}, title = {Affine Perspective-Three-Point Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12217-12226} }
Refacade: Editing Object with Given Reference Texture-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Youze and Ruan, Penghui and Zi, Bojia and Qi, Xianbiao and Wang, Jianan and Xiao, Rong}, title = {Refacade: Editing Object with Given Reference Texture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1961-1972} }
Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhichao and Wang, Jianjie and Zhang, Zhixianhe and Xie, Pangu and Sheng, Xiangfei and Chen, Pengfei and Li, Leida}, title = {Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {145-155} }
LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yusheng and Lou, Lizhi and Tang, Yan and Miao, Zekai and Zhang, Shaoming and Wang, Jianmei}, title = {LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5783-5792} }
COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Yuchen and Wu, Jingtu and Zheng, Hao and Kanezaki, Asako}, title = {COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11567-11578} }
Long-Tail Internet Photo Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuan and Xiangli, Yuanbo and Averbuch-Elor, Hadar and Snavely, Noah and Cai, Ruojin}, title = {Long-Tail Internet Photo Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {417-426} }
Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Nan and Zeng, Yike and Zhang, Qian and Zhang, Qi and Pan, Zhiyi and Feng, Wei and Wan, Liang}, title = {Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6580-6590} }
WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Shan and Qiu, Longtian and Sun, Jiaxuan and He, Xuming}, title = {WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1596-1605} }
RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Tongrui and Li, Qingbin and Zhu, Shengyu and Chen, Wei and Cheng, Xueqi}, title = {RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6538-6548} }
X-band Radar Non-Line-of-Sight Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Dongyu and Zhao, Mingkun and Yang, Yutong and Scheuble, Dominik and Huang, Xiaolong and Shao, Zijian and Bijelic, Mario and Sengupta, Kaushik and Heide, Felix}, title = {X-band Radar Non-Line-of-Sight Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5647-5658} }
DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Henglin and Huang, Huijuan and Wang, Jing and Liu, Chang and Li, Xiu and Ji, Xiangyang}, title = {DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1864-1873} }
Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jingtao and Zhang, Kexin and Ma, Xunchi and Li, Yuechan and Zhu, Guangming and Shen, Peiyi and Jiang, Linhua and Zhang, Xiangdong and Zhang, Liang}, title = {Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13594-13603} }
Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hengzhuang and Zhang, Xinsong and Peng, Qiming and Luo, Bin and Hu, Han and Jiang, Dengyang and Ye, Han-Jia and Zhang, Teng and Jin, Hai}, title = {Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1771-1786} }
MotionEdit: Benchmarking and Learning Motion-Centric Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Yixin and Ke, Lei and Yu, Wenhao and Chang, Kai-Wei and Yu, Dong}, title = {MotionEdit: Benchmarking and Learning Motion-Centric Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9263-9272} }
OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Jianqiang and Liu, Lin and Hoi, Steven}, title = {OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11017-11028} }
Reflection Separation from a Single Image via Joint Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zheng-Hui and Wang, Zhixiang and Liu, Yu-Lun and Chuang, Yung-Yu}, title = {Reflection Separation from a Single Image via Joint Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4569-4579} }
Envisioning the Future, One Step at a Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baumann_2026_CVPR, author = {Baumann, Stefan Andreas and Wiese, Jannik and Martorella, Tommaso and Kalayeh, Mahdi M. and Ommer, Bj\"orn}, title = {Envisioning the Future, One Step at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6823-6836} }
MeToM: Metadata-Guided Token Merging for Efficient Video LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhuojie and Wang, Shijie and Yu, Xin}, title = {MeToM: Metadata-Guided Token Merging for Efficient Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10441-10450} }
Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuru and Zhou, Yue}, title = {Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7165-7174} }
Self-Evaluation Unlocks Any-Step Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xin and Qi, Xiaojuan and Li, Zhengqi and Zhang, Kai and Zhang, Richard and Lin, Zhe and Shechtman, Eli and Wang, Tianyu and Nitzan, Yotam}, title = {Self-Evaluation Unlocks Any-Step Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7816-7826} }
LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Ruofan and M\"uller, Norman and Weber, Ethan and Zauss, Duncan and Vijaykumar, Nandita and Kontschieder, Peter and Richardt, Christian}, title = {LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1100-1111} }
AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Yogesh and Fazli, Pooyan}, title = {AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7912-7922} }
VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jie and Li, Guang and Huang, Zhijian and Dang, Chenxu and Ye, Hangjun and Han, Yahong and Chen, Long}, title = {VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10954-10964} }
FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Jiuyang and Li, Jiahan and Jiang, Junjun and Zhang, Yongbing}, title = {FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7026-7035} }
Agile Deliberation: Concept Deliberation for Subjective Visual Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Leijie and Stretcu, Otilia and Qiao, Wei and Denby, Thomas and Viswanathan, Krishnamurthy and Luo, Enming and Lu, Chun-Ta and Dogra, Tushar and Krishna, Ranjay and Fuxman, Ariel}, title = {Agile Deliberation: Concept Deliberation for Subjective Visual Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4794-4804} }
EMMA: Extracting Multiple physical parameters from Multimodal Data-
[pdf]
[supp]
[bibtex]@InProceedings{Shaikh_2026_CVPR, author = {Shaikh, Farhat and Banerjee, Ayan and Gupta, Sandeep}, title = {EMMA: Extracting Multiple physical parameters from Multimodal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1716-1725} }
MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sinha_2026_CVPR, author = {Sinha, Arkaprava and Raj, Monish Soundar and Wang, Pu and Helmy, Ahmed and Le, Hieu and Das, Srijan}, title = {MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9815-9826} }
HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Muyan and Cui, Erfei and Xing, Sen and Wang, Weiyun and Wu, Wen and Hu, Yuchen and Zhang, Yanting and Hu, Xiaowei and Wang, Wenhai and Zhang, Chao and Dai, Jifeng}, title = {HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8801-8812} }
UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Liang and Wang, Min and Lu, Xingyu and Qiu, Aowen and Zhou, Wengang and Li, Houqiang}, title = {UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13464-13473} }
TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR-
[pdf]
[supp]
[bibtex]@InProceedings{Lentsch_2026_CVPR, author = {Lentsch, Ted and Montiel-Mar{\'\i}n, Santiago and Caesar, Holger and Gavrila, Dariu M.}, title = {TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10040-10050} }
META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jing and Chen, Luyuan and Xu, Zhijie and Li, Yadong and Xu, Xingzhong and Chen, Siye and Liu, Jie and Kong, Ming and Zhu, Qiang}, title = {META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9837-9846} }
Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhao and Li, Jiaqi and Deng, Youming and Chen, Ruxin and Zhang, Yingjia and Ma, Yifei and Guo, Li and Li, Yiming and Zhang, Jing and Feng, Chen}, title = {Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1041-1052} }
Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation-
[pdf]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jun and Huang, Hui}, title = {Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8525-8534} }
Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Chen, Zhangquan and Zhang, Yudong and Lu, Shilin and Yang, Cheng and Zhang, Jiangning and Yan, Shuicheng and Hu, Xiaobin}, title = {Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12300-12311} }
Bridging Human Evaluation to Infrared and Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Li, Xingyuan and Mei, Qingyun and Xu, Haoyuan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Bridging Human Evaluation to Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12322-12333} }
Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Han, Zhizhong}, title = {Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1231-1240} }
Guiding a Diffusion Transformer with the Internal Dynamics of Itself-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xingyu and Li, Qifan and Hu, Xiaobin and Chen, Hai and Gu, Shuhang}, title = {Guiding a Diffusion Transformer with the Internal Dynamics of Itself}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11536-11545} }
DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Zhou and Wang, Shida and Hua, YongXiang and Cao, Haoyu and Xu, Linli}, title = {DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1695-1705} }
From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Ji, Yiping and Le, Anjie and Zhu, Jiayuan and Pan, Jiazhen and Peng, Can and Deng, Jiajun and Liu, Fengbei and Wu, Junde}, title = {From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4816-4828} }
Understanding Counting Mechanisms in Large Language and Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasani_2026_CVPR, author = {Hasani, Hosein and Izadi, Amirmohammad and Askari, Fatemeh and Bagherian, Mobin and Mohammadian, Sadegh and Izadi, Mohammad and Baghshah, Mahdieh Soleymani}, title = {Understanding Counting Mechanisms in Large Language and Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5125-5133} }
MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wang, Xingyuan and Fan, Yang and Zhang, Yunan and Wu, Xiangping and Chen, Qingcai}, title = {MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8387-8396} }
ID-Sim: An Identity-Focused Similarity Metric-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chae_2026_CVPR, author = {Chae, Julia and Kolkin, Nicholas and Wang, Jui-Hsien and Zhang, Richard and Beery, Sara and Ham, Cusuh}, title = {ID-Sim: An Identity-Focused Similarity Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11250-11262} }
Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Jiahao and Song, Chenxi and Cheng, Wei and Zhang, Chi}, title = {Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1973-1982} }
Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Tao and Wang, Huili and Huang, Yuanhong and Wang, Wendan and Zhao, Lianchao and Wang, Jinrui and Qin, Zichen and Wang, Shangguang and Huang, Yongfeng}, title = {Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {62-71} }
Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Jiayu and Wang, Jing and Zhang, Qi and Wang, Wanlong and Xue, Jun}, title = {Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8697-8706} }
ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yitong and Zhou, Lihua and Wei, Jiwei and Ran, Ran and He, Shiyuan and Ma, Zeyu and Li, Shuaifeng and Li, Nianxin and Shen, Heng Tao}, title = {ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3111-3121} }
HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fengyuan and Sur, Tanuj and Tse, Tze Ho Elden and Yao, Angela}, title = {HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13846-13855} }
Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hui and Chen, Kecheng and Wang, Jialiang and Liu, Xianming and Wang, Wenya and Li, Haoliang}, title = {Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5521-5531} }
Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Wenfei and Mei, Jilin and Shen, Tong and Wu, Xumin and Wang, Shuo and Min, Chen and Hu, Yu}, title = {Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13254-13263} }
Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Claessens_2026_CVPR, author = {Claessens, Cris and Viviers, Christiaan and D'Amicantonio, Giacomo and Bondarev, Egor and van der Sommen, Fons}, title = {Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13636-13647} }
SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Jiaming and Zhan, Yifeng and Liu, Chunlin and Zheng, Weihua and Peng, Bingye and Liang, Qiwei and Cai, Boyang and Mai, Xiaochun and Nie, Qiang}, title = {SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13049-13058} }
Lens Component Deletion based on Differentiable Ray Tracing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenguan and Zhang, Qirun and Sun, Tuo and He, Jiajian and Xu, Jiahui and Feng, Huajun and Li, Qi}, title = {Lens Component Deletion based on Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5637-5646} }
MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Joshua and Aghajanzadeh, Sara and Zhu, Zhen and Forsyth, David}, title = {MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8460-8470} }
GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Li, Ziye and Ding, Henghui and Tao, Dacheng}, title = {GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7674-7683} }
F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hengzhi and Feng, Liqian and Wu, Wenhua and Zhu, Xiaogang and Wu, Qiuxia and Shan, Lianlei and Hu, Kun}, title = {F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13275-13284} }
Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Bingjun and Guo, Jialin and Yao, Yue and Ding, Xinpeng}, title = {Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11-19} }
Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tao and Zhong, Yilei and Du, Yuxin and Zhang, Jingjing and Liu, Jiting and Chen, Yinxinyu and Gu, Encheng and Liu, Ziyan and Cai, Hongyi and Zou, Yanwen and Zou, Lixing and Zhou, Zhaoye and Li, Gen and Zhao, Bo}, title = {Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13397-13406} }
Variational Graph-based Normal Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Lixiong and Yu, Bohan and Prisacariu, Victor Adrian and Sato, Imari}, title = {Variational Graph-based Normal Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12663-12672} }
OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuc and Nhu, Anh N. and Lin, Ming C.}, title = {OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14208-14218} }
S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Yuzhou and Tian, Qijian and Zhu, He and Jiang, Xiaoqi and Cao, Guangzhi and Ma, Lizhuang and Xie, Yuan and Tan, Xin}, title = {S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7491-7502} }
NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Fangzhou and Wang, Yuping and Guo, Yuliang and Huang, Zixun and Huang, Xinyu and Zhang, Haichong and Yamada, Kazunori and Tu, Zhengzhong and Ren, Liu and Zhang, Ziming}, title = {NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3761-3771} }
Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Lijing and Shi, Zhan and Huang, Chenglong and Wu, Jinyao and Li, Qiping and Huo, Zikang and Chen, Linsen and Zi, Chongde and Cao, Xun}, title = {Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12522-12532} }
InterRVOS: Interaction-Aware Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Woojeong and Kim, Seongchan and Lee, Jaeho and Kim, Seungryong}, title = {InterRVOS: Interaction-Aware Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10367-10376} }
TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Abousamra_2026_CVPR, author = {Abousamra, Shahira and Sood, Asmita and Plevritis, Sylvia}, title = {TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13691-13701} }
Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{van_Sprang_2026_CVPR, author = {van Sprang, Angela and Samson, Laurens and Lucic, Ana and Acar, Erman and Ghebreab, Sennay and Asano, Yuki M.}, title = {Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8781-8790} }
LNEM: Lunar Neural Elevation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Suwan and Yim, Jo Ryeong and Park, Kibaek and Kim, Dong-Gyu and Kim, Eunhyeuk and Jeong, Minsup and Sim, Chae Kyung and Lee, Seokju}, title = {LNEM: Lunar Neural Elevation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6508-6517} }
Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yujia and Wang, Yuanxiang and Guan, Zhenyu and Yang, Tiankun and Bao, Chenxi and Jin, Haopeng and Luo, Jinwen and Zuo, Xinyu and Duan, Lisheng and Liang, Haijin and Ma, Jin and Wang, Xinming and Tao, Ruiwen and Yi, Hongzhu}, title = {Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1089-1099} }
VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2026_CVPR, author = {Patel, Maitreya and Li, Jingtao and Zhuang, Weiming and Yang, Yezhou and Lv, Lingjuan}, title = {VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2058-2068} }
CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Meng and Lin, Dawei and Xie, Hongxia and Wu, Tieru and Ma, Rui}, title = {CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3244-3253} }
When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Liangwei and Xu, Jiaqi and Ding, Jianwei and Deng, Qiyao}, title = {When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8577-8586} }
Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10766-10777} }
ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Dharanikota, Krithika and Jia, Emily and Chen, Haiwei and Zhao, Yajie}, title = {ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6518-6527} }
Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Linxiao and Zheng, Siming and Wang, Zerong and Zhang, Hao and Chen, Jinwei and Li, Bo and Chen, Shifeng and Jiang, Peng-Tao}, title = {Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {231-240} }
V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weijia and Xiang, Haoen and Wang, Tianxu and Wu, Shuaibing and Xia, Qiming and Wang, Cheng and Wen, Chenglu}, title = {V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4728-4737} }
Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ruoran and Cheng, Haoyu and Dong, Bin and Wang, Qiufeng}, title = {Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9658-9667} }
PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Zhai, Fuxiang and Luo, Junfeng and Wei, Xiaoming and Wang, Lujia and Zhu, Lei}, title = {PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7762-7772} }
STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiankuo and Zhu, Xiangyu and Wang, Zidu and Lei, Zhen}, title = {STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10996-11005} }
PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoya and Wang, Long and Liu, Yan and Liu, Xinyi and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5379-5388} }
Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zidian and Wu, Ancong}, title = {Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5902-5911} }
MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyi and Ren, Pengfei and Zhang, Haoyang and Zhan, Hanling and Li, Yingxi and Xie, Liang and Gao, Yue and Yin, Erwei}, title = {MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13996-14005} }
Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jianzhe and Wang, Churan and Zhang, Weiyi and Li, Jianghua and Li, Li-An and Wang, Wenguan and Zhu, Yixin and Wang, Yizhou}, title = {Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7014-7025} }
Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Xinyu and Li, Han and Huang, Yuyang and Zheng, Ziyang and Wang, Yaoming and Chen, Xin and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11448-11458} }
WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Yuxuan and Ming, Xin and Li, Tianxiao and Shen, Zhuofan and Zhang, Qixuan and Xu, Lan and Xu, Feng}, title = {WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10909-10920} }
SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Fei and Chen, Zhuo and Yuan, Yifu and Dong, Zibin and Yao, Xianze and Luo, Shan and Hao, Jianye and Deng, Jiankang and Zafeiriou, Stefanos}, title = {SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12237-12247} }
Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Yuting and Cheng, Xilong and Qin, Yunxiao and Li, Zhengnan and Zhang, Jingjing}, title = {Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10545-10555} }
Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyu and Liu, Dong and Chen, Chang Wen}, title = {Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12118-12128} }
T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Chaohong and He, Yihan and Nie, Yongwei and Ma, Fei and Xu, Xuemiao and Long, Chengjiang}, title = {T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3443-3454} }
OVI-MAP: Open-Vocabulary Instance-Semantic Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zilong and Tombari, Federico and Pollefeys, Marc and Wald, Johanna and Barath, Daniel}, title = {OVI-MAP: Open-Vocabulary Instance-Semantic Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12606-12616} }
GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Xiuting and Pu, Ruizhi and Yao, Yuanhang and Yue, Kun and Tang, Zhiwen and Yu, Lixing}, title = {GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10346-10356} }
What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yingqi and Tong, Junlong and Zhao, Anhao and Shen, Xiaoyu}, title = {What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11987-11997} }
All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Junjiang and Wang, Liejun and Guo, Zhiqing}, title = {All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14106-14115} }
Coordinate Denoising for Non-Equilibrium Molecular Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qianwei and Xu, Baile and Zhao, Jian and Shen, Furao}, title = {Coordinate Denoising for Non-Equilibrium Molecular Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3584-3593} }
Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Shannan and Zheng, Leqi and Lv, Keyu and Ni, Jingchen and Wei, Hongyang and Zhang, Jiajun and Wang, Guangting and LYU, Jing and Yuan, Chun and Rao, Fengyun}, title = {Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6653-6663} }
Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Qin, Ke and Yin, Wen and Li, Yuan-Fang and Li, Ming and He, Tao}, title = {Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10208-10218} }
Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Fanqi and Tiezzi, Matteo and Apicella, Tommaso and Beyan, Cigdem and Murino, Vittorio}, title = {Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6740-6749} }
A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Wentao and Mei, Guofeng and Wu, Yang and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9434-9444} }
Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2864-2874} }
MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yifan and Zhang, Chao and Ma, Ruifei and Gao, Fei and Yang, Zhifei and Qi, Jiaxing and Chen, Zhipeng}, title = {MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2778-2787} }
OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeyu and Po, Lai Man and Xu, Xuyuan and Wang, Yexin and Gong, Guoping and Wu, Haoxuan and Yan, Chenbo and Li, Kun and Liu, Yuyang}, title = {OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9127-9138} }
GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2026_CVPR, author = {Zang, Qi and Zhao, Dong and Pu, Nan and Li, Wenjing and Zhong, Zhun and Wang, Meng}, title = {GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {871-881} }
AVGGT: Rethinking Global Attention for Accelerating VGGT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xianbing and Zhu, Zhikai and Lou, Zhengyu and Yang, Bo and Tang, Jinyang and Zhang, Liqing and Wang, He and Zhang, Jianfu}, title = {AVGGT: Rethinking Global Attention for Accelerating VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {251-260} }
CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiyu and Sheng, Dianmo and Chu, Qi and Chen, Shilong and Gong, Tao and Wei, Zhou and Yu, Nenghai}, title = {CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13179-13188} }
Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Qiya and Xie, Yiqiang and Sun, Yuan and Dian, Renwei and Kang, Xudong}, title = {Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9732-9741} }
STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zongzhao and Ma, Zongyang and Li, Mingze and Li, Songyou and Rong, Yu and Xu, Tingyang and Zhang, Ziqi and Zhao, Deli and Huang, Wenbing}, title = {STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12041-12051} }
MRI Contrast Enhancement Kinetics World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Jindi and He, Yuting and Xia, Cong and Ge, Rongjun and Li, Shuo}, title = {MRI Contrast Enhancement Kinetics World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1288-1299} }
FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, YuAn and Li, Xiaofan and Huang, Chi and Zhang, Wenhao and Li, Hao and Wang, Bosheng and Sun, Xun and Wang, Jun}, title = {FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1198-1209} }
IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingan and Li, Wensheng and Gao, Chengying}, title = {IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1210-1220} }
GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lin and Jia, Caiyan and Yu, Guanyi and Song, Ziying and Li, Junqiao and Jia, Feiyang and Wu, Peiliang and Hao, Xiaoshuai and Luo, Yadan}, title = {GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3719-3728} }
The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Weijia and Chen, Hao and Yang, Zhenheng and Shou, Mike Zheng}, title = {The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5999-6009} }
Event Stream Filtering via Probability Flux Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jinze and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-Jun}, title = {Event Stream Filtering via Probability Flux Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8023-8032} }
Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ruidong and Bai, Yancheng and Zhang, Xuanpu and Zeng, Jianhao and Wang, Lanjun and Song, Dan and Sun, Lei and Chu, Xiangxiang and Liu, Anan}, title = {Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11493-11503} }
Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Ke and Zhang, Jiangning and Yi, Ran and Gong, Jingyu and Wang, Yabiao and Wang, Yating and Tan, Xin and Wang, Chengjie and Ma, Lizhuang}, title = {Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9330-9341} }
Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ting-Hsuan and Chen, Ying-Huan and Tu, Tao and Lee, Jie-Ying and Wu, Cho-Ying and Lin, Fangzhou and Zhang, Hengyuan and Paz, David and Huang, Xinyu and Guo, Yuliang and Liu, Yu-Lun and Wang, Yue and Ren, Liu}, title = {Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11138-11149} }
Correspondence-Attention Alignment for Multi-View Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Minkyung and Choi, Jinhyeok and Park, Jiho and Jeon, Seonghu and Jang, Jinhyuk and Seo, Junyoung and Kwak, Minseop and Kim, Jin-Hwa and Kim, Seungryong}, title = {Correspondence-Attention Alignment for Multi-View Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2316-2326} }
CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Maliha_2026_CVPR, author = {Maliha, Maisha and Hougen, Dean F.}, title = {CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9891-9900} }
Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yujuan and Li, Qing and Li, Ziyu and Li, Xiuxing and Wang, Zhuo and Xu, Mengrui and Wu, Xia}, title = {Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1787-1797} }
Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yifan and Huang, Haofeng and Yang, Wenhan and Liu, Jiaying}, title = {Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1386-1395} }
Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Luo, Jiabin and Yan, Zhenglin and Yu, Zhenyu and Fu, Rong and Gan, Zhongxue and Ouyang, Chun}, title = {Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8791-8800} }
Event-based Visual Deformation Measurement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuliang and Zhai, Wei and Cui, Yuxin and Zhao, Tiesong and Cao, Yang and Zha, Zheng-Jun}, title = {Event-based Visual Deformation Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {903-913} }
GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Bing and Liu, Weiyuan and Zhang, Changlong and Wang, Chenxi and Zhao, Zhibin and Zhai, Zhi}, title = {GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6729-6739} }
Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Deyu and Wang, Xinchao}, title = {Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5596-5605} }
Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chi-Pin and Man, Yunze and Yu, Zhiding and Chen, Min-Hung and Kautz, Jan and Wang, Yu-Chiang Frank and Yang, Fu-En}, title = {Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5070-5081} }
Geometry-Guided 3D Visual Token Pruning for Video-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Han and Huang, Zehao and Fu, Jiahui and Wang, Naiyan and Liu, Si}, title = {Geometry-Guided 3D Visual Token Pruning for Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9615-9625} }
DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chaolang and Dai, Pengwen and Li, Jingyu and Yao, Siyuan and Jiang, Yuchen and Zheng, Zhuoran}, title = {DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11313-11323} }
Residual Primitive Fitting of 3D Shapes with SuperFrusta-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganeshan_2026_CVPR, author = {Ganeshan, Aditya and Gadelha, Matheus and Groueix, Thibault and Chen, Zhiqin and Chaudhuri, Siddhartha and Kim, Vladimir and Yifan, Wang and Ritchie, Daniel}, title = {Residual Primitive Fitting of 3D Shapes with SuperFrusta}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7404-7413} }
SAM 3D: 3Dfy Anything in Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xingyu and CHU, FU-JEN and Gleize, Pierre and Liang, Kevin J and Sax, Alexander and Tang, Hao and Wang, Weiyao and Guo, Michelle and Hardin, Thibaut and Li, Xiang and Lin, Aohan and Liu, Jia-Wei and Ma, Ziqi and Sagar, Anushka and Song, Bowen and Wang, Xiaodong and Yang, Jianing and Zhang, Bowen and Doll\'ar, Piotr and Gkioxari, Georgia and Feiszli, Matt and Malik, Jitendra}, title = {SAM 3D: 3Dfy Anything in Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7220-7232} }
RNED: Rotary Number Encoding and Decoding for Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fengbei and Kwak, Sunwoo and Nizam, Nusrat and Richter, Ilan and Beecy, Ashley and Raikhelkar, Jayant and Estrin, Deborah and Sabuncu, Mert R.}, title = {RNED: Rotary Number Encoding and Decoding for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13722-13731} }
CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Liu, Lin and Zhang, Xiaopeng and Xue, Wei and Luo, Wenhan and Guo, Yike and Tian, Qi}, title = {CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1145-1154} }
BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tengfei and Fan, Yijian and Wang, Boyue and Hu, Yongli and Li, Mingjie and Li, Jinghua and Gao, Junbin and Chang, Xiaojun and Li, Zhihui and Yin, Baocai}, title = {BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13755-13765} }
Anti-Degradation Lifelong Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingfeng and Pan, Hao and Yuan, Honglin and Sun, Yuan and Zhao, Xujian and Lin, Jiaqi and Ren, Zhenwen}, title = {Anti-Degradation Lifelong Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8749-8759} }
FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Ye, Mang}, title = {FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3379-3389} }
ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Steiner_2026_CVPR, author = {Steiner, Emily and Zheng, Jianhao and Howard-Jenkins, Henry and Xie, Chris and Armeni, Iro}, title = {ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10710-10720} }
Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Liao, Borui and Huang, Huijuan and Lu, Jinda and Li, Ouxiang and Liu, Kuien and Wang, Meng and Wang, Xiang}, title = {Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4526-4536} }
Lipschitz Optimization for Formal Verification of Homographies-
[pdf]
[supp]
[bibtex]@InProceedings{Durand_2026_CVPR, author = {Durand, Jean-Guillaume and Kouvaros, Panagiotis and Gariel, Maxime and Lomuscio, Alessio}, title = {Lipschitz Optimization for Formal Verification of Homographies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13306-13315} }
Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Kou, Ziyi and Qin, Chuan and Huang, Mia and Ristani, Ergys and Kumar, Ankit and Chen, Lele and He, Kun and Boularias, Abdeslam and Guan, Li}, title = {Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1829-1840} }
HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaoyu and Liu, Yuhang and Kang, Xuanshuo and Luo, Zheng and Lou, Fangqi and Wu, Xiaohua and Xiong, Zihan}, title = {HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3069-3078} }
Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Pham, Phuc and Farhadzadeh, Farzad and Choi, Seokeon and Nguyen, Phong and Tran, Anh and Yun, Sungrack and Hayat, Munawar and Porikli, Fatih}, title = {Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {550-560} }
DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Mei, Mingyu and Wang, Ailing and Meng, Xianhui and Zhong, Yan and Song, Xinyuan and Liu, Liu and Wang, Rujing and He, Zaixing and Lu, Cewu}, title = {DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4687-4697} }
Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuran and Zeng, Bohan and Tong, Chengzhuo and Liu, Wenxuan and Shi, Yang and Ma, Xiaochen and Liang, Hao and Zhang, Yuanxing and Zhang, Wentao}, title = {Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7773-7783} }
AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zichuan and Liu, Yicheng and Yang, Yang and Tao, Lvfang and Ye, Deheng}, title = {AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11923-11932} }
GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Jiale and Hu, Jiarui and Yang, Zesong and Luan, Kaixuan and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11850-11859} }
An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yi and E, Junwu and Guo, Zizhan and Ma, Yu and Wang, Hanli and Fan, Rui}, title = {An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14219-14228} }
Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10830-10839} }
Reinforcing Video Object Segmentation to Think before it Segments-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yu, Jiazuo and Zhang, Pingping and Jia, Xu and Lu, Huchuan}, title = {Reinforcing Video Object Segmentation to Think before it Segments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3835-3844} }
GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Zhuojiang and Sun, Zhenghui and Lu, Feng}, title = {GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12312-12321} }
Scene-Centric Unsupervised Video Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Reich_2026_CVPR, author = {Reich, Christoph and Hahn, Oliver and Araslanov, Nikita and Leal-Taix\'e, Laura and Rupprecht, Christian and Cremers, Daniel and Roth, Stefan}, title = {Scene-Centric Unsupervised Video Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10753-10765} }
Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenxu and Zhang, Kai and Yang, Jian}, title = {Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1277-1287} }
MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Lin, Xiaotong and Zeng, Ling-An and Kang, Yulei and Li, Shuai and Hu, Jian-Fang}, title = {MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9352-9363} }
What Are You Doing? A Closer Look at Controllable Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bugliarello_2026_CVPR, author = {Bugliarello, Emanuele and Arnab, Anurag and Paiss, Roni and Koh, Christy and Kindermans, Pieter-Jan and Schmid, Cordelia}, title = {What Are You Doing? A Closer Look at Controllable Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11414-11425} }
Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinsong and Qu, Ying and Liao, Yuan and Qi, Hairong and Shao, Zhenzhou}, title = {Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12344-12353} }
LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Shuaibang and Zhu, Juelin and Li, Xia and Yang, Kun and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12193-12205} }
Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Zhongxiao and Zhao, Qitao and Jeon, Minsik and Tulsiani, Shubham}, title = {Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {438-447} }
Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Kazerouni_2026_CVPR, author = {Kazerouni, Amirhossein and Suin, Maitreya and Aumentado-Armstrong, Tristan and Honari, Sina and Walia, Amanpreet and Mohomed, Iqbal and Derpanis, Konstantinos G. and Taati, Babak and Levinshtein, Alex}, title = {Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8428-8438} }
Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jianhua and Tian, Meng and Zhu, Jiangtong and He, Fan and Zhang, Huixin and Guo, Sitong and Zhu, Dechang and Tang, Hao and Xu, Pei and Guo, Yuze and Niu, Minzhe and Zhu, Haojie and Dong, Qichao and Yan, Xuechao and Dong, Siyuan and Hou, Lu and Huang, Qingqiu and Jia, Xiaosong and Xu, Hang}, title = {Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10642-10655} }
Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Zhuoli and Chang, Yu-Cheng and Wang, Yu-Kai and Do, Thomas and Lin, Chin-Teng}, title = {Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10599-10609} }
EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiashi and Jiang, Changhong and Lin, Xiangru and Zhang, Ruifei and Zhu, Xinyi and Liu, Jiyao and Tang, Cheng and Du, Ye and Gao, Shujian and Ning, Junzhi and Liu, Lihao and Huang, Ziyan and Li, Tianbin and Ye, Jin and He, Junjun}, title = {EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {756-765} }
FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yujie and Cai, Zhuoqiang and Niu, Chaoyue and Chen, Jianchuan and Chen, Zhiwen and Lv, Chengfei and Wu, Fan}, title = {FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4132-4144} }
Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, YiZhou and Xu, Jinyi and Yin, Mingyu and Zhao, Xianyi}, title = {Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12987-12996} }
TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Qi and Wu, Xuan and Peng, Jiawei and Miao, Yuan and Yang, Xu and Du, Yanlong}, title = {TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9773-9783} }
BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Zheng, Haoze and Mao, Yuhang and Liu, Jinyuan and Xu, Chengpei and Xue, Xinwei and Wang, Yi and He, Xiangjian and Wang, Weimin}, title = {BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10731-10740} }
Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Yanbo and Fu, Jianlong and Zhang, Ruoxuan and Xie, Hongxia and Yao, Meibao}, title = {Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13508-13518} }
GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Wu, Ziheng and Zhu, Yousong and Xue, Rongkun and Zhou, Guanghao and Luo, Ruipu and Chen, Zhenghao and Zhang, Can and Li, Yifan and He, Zhentao and Yang, Zheming and Tang, Ming and Qiu, Minghui and Wang, Jinqiao}, title = {GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11954-11965} }
Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jiude and Li, Yuxuan and Lu, Cewu and Sun, Jianhua}, title = {Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13519-13528} }
CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pyatov_2026_CVPR, author = {Pyatov, Vladislav and Bobrovskikh, Gleb and Galochkin, Saveliy and Boldyrev, Nikita and Voynov, Oleg and Filippov, Alexander and Ferrer, Gonzalo and Wonka, Peter and Burnaev, Evgeny}, title = {CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10176-10186} }
Perceptual Neural Video Compression with Color Separation and Rank Chain-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xiongzhuang and Tang, Chuanbo and Li, Zhuoyuan and Li, Li and Liu, Dong}, title = {Perceptual Neural Video Compression with Color Separation and Rank Chain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5348-5358} }
BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saluja_2026_CVPR, author = {Saluja, Rachit and Cihangir, Asli and Deng, Ruining and Paetzold, Johannes C. and Liu, Fengbei and Sabuncu, Mert R.}, title = {BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8492-8502} }
FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Minguk and Kwak, Suha}, title = {FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5294-5305} }
SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lingxiao and Kim, Dongwon and Ruan, Lingyan and Chen, Bin and Kwon, Taesoo and Rhee, Taehyun}, title = {SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8174-8182} }
UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Wenbin and Lin, Jiawen and Xie, Yuan and Zhang, Yachao and Qu, Yanyun}, title = {UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9547-9557} }
CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Xindong and Li, Hang and Wu, Yuchen and Li, Jiahe and Bai, Xiao and Zheng, Jin}, title = {CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12206-12216} }
$\alpha$Matte4K & $\mu$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xinyi and Dong, Hang and Jiang, Baowei and Xu, Shenkun and Guan, Youqi and Shi, Kanle and Gai, Kun and Song, Haichuan}, title = {\${\textbackslash}alpha\$Matte4K \& \${\textbackslash}mu\$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12491-12500} }
Your One-Stop Solution for AI-Generated Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Xue, Zihao and Wang, Yan and Yan, Zhiyuan and Xu, Jin and Jiang, Xiaorui and Yu, Haiyang and Liao, Yong and Bi, Zhen}, title = {Your One-Stop Solution for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4458-4470} }
Breaking Multimodal LLM Safety via Video-Driven Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dong and He, Xiangyu and Lyu, Xinqi and Xiao, Bin}, title = {Breaking Multimodal LLM Safety via Video-Driven Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8566-8576} }
Common Inpainted Objects In-N-Out of Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tianze and Jordan, Tyson and Sun, Ruitong and Liu, Ninghao and Sun, Jin}, title = {Common Inpainted Objects In-N-Out of Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13069-13079} }
U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Xiang and Liang, Alan and Liu, Youquan and Li, Linfeng and Kong, Lingdong and Liu, Ziwei and Liu, Qingshan}, title = {U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10027-10039} }
Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuzeng and Zhang, Tao and Tang, Xiangyun and Wang, Jiacheng and Wang, Jian and Kang, Jiawen and Liu, Jiqiang and Han, Zhen and Niyato, Dusit and Kim, Dong In}, title = {Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13336-13345} }
IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Feiyu and Yang, Jiayuan and Zhao, Zhiyuan and Zhang, Da and Li, Bingyu and Liu, Peng and Gao, Junyu}, title = {IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {615-625} }
BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Renbo and SaraerToosi, Ali and Conroy, Nicholas S. and Pekhimenko, Gennady and Levis, Aviad}, title = {BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5606-5616} }
BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Chaodong and Zhang, Zhengqiang and Zhang, Lei}, title = {BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12106-12117} }
FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Wenshuo and Fan, Junyi and Zeng, Jiangyue and Yang, Shuai}, title = {FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2025-2034} }
ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ao and Li, Xingming and Ji, Xuanyu and He, Xixiang and Sun, Qiyao and Qiu, Chunping and Huang, Runke and Hu, Qingyong}, title = {ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2423-2433} }
BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhan and Lyu, Chen}, title = {BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12892-12901} }
One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Haji-Ali_2026_CVPR, author = {Haji-Ali, Moayed and Menapace, Willi and Skorokhodov, Ivan and Park, Dogyun and Kag, Anil and Vasilkovsky, Michael and Tulyakov, Sergey and Ordonez, Vicente and Siarohin, Aliaksandr}, title = {One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4558-4568} }
OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pian_2026_CVPR, author = {Pian, Weiguo and Kushwaha, Saksham Singh and Chen, Zhimin and Deng, Shijian and Wang, Kai and Guo, Yunhui and Tian, Yapeng}, title = {OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {540-549} }
SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment-
[pdf]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Tianle and Yan, Fang and Zhang, Xiaofan}, title = {SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12544-12553} }
ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhenyang and Gu, Yongchong and Wang, Yikai and Xue, Xiangyang and Fu, Yanwei}, title = {ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8141-8151} }
MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Soomin and Lee, Eunseong and Bin Lee, Kwang and Lee, Sung-Hee}, title = {MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2285-2294} }
SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Yu and Wickremasinghe, Tharindu and Nadir, Zeeshan and Wang, Xijun and Chi, Yiheng and Chan, Stanley H.}, title = {SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11150-11162} }
REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaze and Yin, Hao and Tan, Wenhui and Chen, Jingyang and Xu, Boshen and Qu, Yuxun and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian}, title = {REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5059-5069} }
Hierarchical Concept Embedding & Pursuit for Interpretable Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Nghia and Ding, Tianjiao and Vidal, Ren\'e}, title = {Hierarchical Concept Embedding \& Pursuit for Interpretable Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2907-2917} }
Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{De_Moura_Matos_2026_CVPR, author = {De Moura Matos, Ivan Luiz and Saoud, Abdel Djalil Sad and Iakovleva, Ekaterina and Pastore, Vito Paolo and Tartaglione, Enzo}, title = {Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3294-3305} }
ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Kwon, Minchan and Lee, Dongyeun and Jeon, Yunho and Kim, Junmo}, title = {ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2381-2390} }
Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yinan and Hu, Kejia and Chen, Ye and Dou, Jianyu and Wang, Jiahui and Zhao, Jingyu and Ao, Haojia and Yang, Yi and Yue, Yufeng}, title = {Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6695-6705} }
Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xueyu and Zhang, Xiaoyi and Liu, Meilin and Shi, Guangze and Shen, Jia and Wang, Yujie and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang and Chen, Yongle}, title = {Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6591-6600} }
Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Wu, Zimeng and Wu, Yu and Fu, Zehua and Chen, Jiaxin}, title = {Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3772-3782} }
Multi-Scale Local Speculative Decoding for Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Peruzzo_2026_CVPR, author = {Peruzzo, Elia and Sauti\`ere, Guillaume and Habibian, Amirhossein}, title = {Multi-Scale Local Speculative Decoding for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5253-5262} }
Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaogang and Hu, Jinchao and Wang, Zixian and Liu, Dun and Cheng, BoXiang and Wu, Yiqiang}, title = {Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9955-9964} }
Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Linkang and Li, Gang and Song, Yue and Ji, Xiangxin}, title = {Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13113-13122} }
Language-driven Fine-grained Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shijie and Yu, Xin and Luo, Yadan and Wang, Zijian and Zhang, Pengfei and Huang, Zi}, title = {Language-driven Fine-grained Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2682-2692} }
X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Youngseo and Yun, Kwan and Hong, Seokhyeon and Cha, Sihun and Koo, Colette Suhjung and Noh, Junyong}, title = {X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4403-4414} }
SURF: Signature-Retained Fast Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Kaixin and Chen, Xi and Ji, Sihui and Gao, Yuan and Hou, Liang and Tao, Xin and Zhao, Hengshuang}, title = {SURF: Signature-Retained Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9171-9181} }
Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nan_2026_CVPR, author = {Nan, Mu and Yu, Muquan and Mai, Weijian and Prince, Jacob S. and Adeli, Hossein and Zhang, Rui and Cao, Jiahang and Becker, Benjamin and Pyles, John A. and Henderson, Margaret M. and Song, Chunfeng and Kriegeskorte, Nikolaus and Tarr, Michael J. and Hu, Xiaoqing and Luo, Andrew F.}, title = {Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3604-3616} }
fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yuxiang and Zhang, Yanteng and Xiao, Xi and Qian, Chengxuan and Wang, Tianyang and Calhoun, Vince D.}, title = {fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6931-6940} }
CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lin and Liu, Fang and Xing, Xiaofen and Guo, Kailing and Xu, Xiangmin}, title = {CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10577-10586} }
OneHOI: Unifying Human-Object Interaction Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoe_2026_CVPR, author = {Hoe, Jiun Tian and Hu, Weipeng and Jiang, Xudong and Tan, Yap-Peng and Chan, Chee Seng}, title = {OneHOI: Unifying Human-Object Interaction Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7664-7673} }
Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Chen, Xiaojun and Liu, Wu and Wang, Weiping}, title = {Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3337-3346} }
Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zijun and Wang, Ping and Wang, Xiaodong and Chen, Chang and Yuan, Xin}, title = {Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6910-6919} }
NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vargas_2026_CVPR, author = {Vargas, Edwin and Lopez, Jhon and Arguello, Henry and Veeraraghavan, Ashok}, title = {NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {102-112} }
Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Chen, Bowen and Yang, Zhihao and Huang, Wenze and Gao, Yu and Liu, Xueting and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12849-12859} }
SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiwen and Zhou, Yi and Zhang, Yizhe and Li, Jingxiong and Zhou, Tao}, title = {SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1450-1460} }
DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peiying and Zhao, Nanxuan and Fisher, Matthew and Xu, Yiran and Liao, Jing and Liu, Difan}, title = {DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10219-10229} }
FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hanxiao and Guo, Yuan-Chen and Liu, Ying-Tian and Zou, Zi-Xin and Zhang, Biao and Quan, Weize and Liang, Ding and Cao, Yan-Pei and Yan, Dong-Ming}, title = {FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12719-12729} }
MatE: Material Extraction from Single-Image via Geometric Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Zhai, Wei and Yang, Jian and Cao, Yang}, title = {MatE: Material Extraction from Single-Image via Geometric Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12480-12490} }
GenMatter: Perceiving Physical Objects with Generative Matter Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Eric and Dasgupta, Arijit and Friedman, Yoni and Huot, Mathieu and Mansinghka, Vikash and O'Connell, Thomas and Freeman, William T. and Tenenbaum, Joshua B.}, title = {GenMatter: Perceiving Physical Objects with Generative Matter Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3165-3175} }
SPDMark: Selective Parameter Displacement for Robust Video Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fares_2026_CVPR, author = {Fares, Samar and Tastan, Nurbek and Nandakumar, Karthik}, title = {SPDMark: Selective Parameter Displacement for Robust Video Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10303-10312} }
Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Mingjie and ma, yichao and Yang, Zhong and Li, Guohui}, title = {Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8727-8736} }
MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deria_2026_CVPR, author = {Deria, Ankan and Kumar, Komal and Dukre, Adinath Madhavrao and Segal, Eran and Khan, Salman and Razzak, Imran}, title = {MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5092-5103} }
P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shimomura_2026_CVPR, author = {Shimomura, Kota and Arai, Hidehisa and Takahashi, Tsubasa and Yamashita, Takayoshi and Fujiyoshi, Hironobu}, title = {P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11736-11745} }
UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuchuan and Motamedi, Azadeh and Kwon, Hyock Ju and Park, Chul B and Kim, Il-Min}, title = {UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6282-6292} }
Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Zhiheng and Hu, Yupeng and Yang, Qianyun and Zhang, Shiqi and Chen, Zhiwei and Li, Zixu}, title = {Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2658-2670} }
MeshSplatting: Differentiable Rendering with Opaque Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Held_2026_CVPR, author = {Held, Jan and Son, Sanghyun and Vandeghen, Renaud and Rebain, Daniel and Gadelha, Matheus and Zhou, Yi and Cioppa, Anthony and Lin, Ming C. and Van Droogenbroeck, Marc and Tagliasacchi, Andrea}, title = {MeshSplatting: Differentiable Rendering with Opaque Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7320-7329} }
CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Binbin and Duan, Haobin and Zhao, Yiqun and Zhao, Zibo and Ma, Yi and Gao, Shenghua}, title = {CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12741-12752} }
Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning-
[pdf]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yongxin and Chen, Weisen and Chen, Xingye and Shao, Yuanjie and Zuo, Zhengrong and Tan, Wenming and Ren, Wenqi and Gao, Changxin and Sang, Nong}, title = {Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5511-5520} }
FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Haoming and Qu, Delin and Yao, Yuanqi and Chen, Qizhi and Li, Jiarui and Lv, Qi and Tang, Yiwen and Kang, Li and Zhou, Heng and Gao, Xianqiang and Tang, Yuhang and Li, Xiaofan and Shi, Modi and Ren, Guanghui and Yao, Maoqing and Zhao, Bin and Wang, Dong and Li, Xuelong}, title = {FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13407-13418} }
LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shihab_2026_CVPR, author = {Shihab, Ibne Farabi and Akter, Sanjeda and Sharma, Anuj}, title = {LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3574-3583} }
COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Peizheng and Wang, Jingyao and Qiang, Wenwen and Zhou, Jiahuan and Zheng, Changwen and Hua, Gang}, title = {COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11051-11063} }
AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Teng and Lu, Yanting and Wang, Ruize}, title = {AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4054-4064} }
Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ninghui and Tosi, Fabio and Wang, Lihui and Han, Jiawei and Bartolomei, Luca and Yao, Zhiting and Poggi, Matteo and Mattoccia, Stefano}, title = {Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {914-925} }
Disco-GS: Gaussian Splatting in Dynamic Color Lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Rajagopalan, A. N.}, title = {Disco-GS: Gaussian Splatting in Dynamic Color Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11891-11900} }
PureCC: Pure Learning for Text-to-Image Concept Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Zhichao and Xian, Xiaole and Li, Qingyu and Qin, Wenyu and Wang, Meng and Xie, Weicheng and Song, Siyang and Feng, Pingfa and Zeng, Long and Pan, Liang}, title = {PureCC: Pure Learning for Text-to-Image Concept Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7730-7740} }
Scaling Dense Event-Stream Pretraining from Visual Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhiwen and Hou, Junhui and Zhu, Zhiyu and Wu, Jinjian and Shi, Guangming}, title = {Scaling Dense Event-Stream Pretraining from Visual Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8011-8022} }
CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Xia and Chen, Ruiqi and Liu, Benlin and Ma, Jingwei and Di, Zonglin and Krishna, Ranjay and Froehlich, Jon}, title = {CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4043-4053} }
Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kaiqiang and Li, Gang and Zhou, Mingle and Li, Min and Han, Delong and Wan, Jin}, title = {Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14167-14177} }
Transition Matching Distillation for Fast Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Weili and Berner, Julius and Ma, Nanye and Liu, Chao and Xie, Saining and Vahdat, Arash}, title = {Transition Matching Distillation for Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4645-4655} }
MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Kehong and Wen, Zhengyu and He, Weixia and Xu, Mingxi and Wang, Qi and Zhang, Ning and Li, Zhengyu and Lian, Dongze and Zhao, Wei and He, Xiaoyu and Zhang, Mingyuan}, title = {MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7089-7099} }
Bootstrapping Multi-view Learning for Test-time Noisy Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Changhao and Xue, Di and Li, Shuxian and Hao, Yanji and Peng, Xi and Hu, Peng}, title = {Bootstrapping Multi-view Learning for Test-time Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1627-1638} }
Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuehao and Guan, Shanyan and Zhang, Weijia and Shang, Xuanming and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3898-3907} }
CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Xu, Qi'ao and Qian, Tianwen and Fu, Yuqian and Jiao, Yang and Wang, Xiaoling}, title = {CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5134-5143} }
TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jiaming and Hou, Guanyu and Li, Hongwei and Huang, Zhicong and Chen, Kangjie and Yu, Yi and Jiang, Wenbo and Xu, Guowen and Zhang, Tianwei}, title = {TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41-50} }
GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Rang and Li, Lei and Ren, Shuhuai and Tian, Hao and Gu, Shuhao and Li, Shicheng and Yue, Zihao and Wang, Yudong and Ma, Wenhan and Yang, Zhe and Ma, Jingyuan and Sui, Zhifang and Luo, Fuli}, title = {GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2412-2422} }
Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Sheyang and Sarvestani, Armin Shafiee and Xu, Jialu and Xu, Xiaoyu and Wang, Zhou}, title = {Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8278-8287} }
FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yiyi and Wu, Yuhan and Li, Kunhang and Zhou, You and Zheng, Bo and Liu, Haiyang}, title = {FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2295-2304} }
MeshRipple: Structured Autoregressive Generation of Artist-Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Junkai and Long, Hang and Guo, Huipeng and Zhang, Jielei and Yang, Jiayi and Guo, Tianle and Yang, Yang and Li, Jianwen and ZHANG, Wenxiao and Nie{\ss}ner, Matthias and Yang, Wei}, title = {MeshRipple: Structured Autoregressive Generation of Artist-Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12706-12718} }
BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Pati_2026_CVPR, author = {Pati, Pushpak and Li, Bo and Khan, Abbas Rayabat and Albuquerque, Tom\'e and Jaensch, Steffen and Mollaysa, Amina and Abdelmoula, Walid M. and Allen, Samantha J. and Reumers, Joke and Mohammad, Helai P. and Oloff, Scott and Mansi, Tommaso and Liao, Rui and Lituiev, Dmytro S. and Xu, Zhoubing}, title = {BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6982-6993} }
Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration-
[pdf]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wan, Liang and Deng, Jingjing}, title = {Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13702-13711} }
Text-Image Conditioned 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cen_2026_CVPR, author = {Cen, Jiazhong and Fang, Jiemin and Li, Sikuang and Wu, Guanjun and Yang, Chen and Yi, Taoran and Zhou, Zanwei and Bao, Zhikuan and Xie, Lingxi and Shen, Wei and Tian, Qi}, title = {Text-Image Conditioned 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {604-614} }
SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chao_2026_CVPR, author = {Chao, Jun-Jee and Isler, Volkan}, title = {SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5027-5037} }
Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Mingrui and Yang, Hexiong and Liu, Haogeng and Huang, Huaibo and He, Ran}, title = {Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5211-5220} }
ORBIT: Benchmarking SfM in the Wild with 360deg Video-
[pdf]
[supp]
[bibtex]@InProceedings{Sabour_2026_CVPR, author = {Sabour, Sara and Tucker, Richard and Brubaker, Marcus and Saxena, Saurabh and Hur, Junhwa and Tagliasacchi, Andrea and Sun, Deqing and Fleet, David J. and Szeliski, Richard and Snavely, Noah}, title = {ORBIT: Benchmarking SfM in the Wild with 360deg Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6791-6801} }
VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Tanush and Salehi, Mohammadreza and Park, Jae Sung and Ramanujan, Vivek and Hajishirzi, Hannaneh and Choi, Yejin and Farhadi, Ali and Tripathi, Rohun and Krishna, Ranjay}, title = {VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12881-12891} }
Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ruiying and Wu, Xueyu and Lei, Jing and Hou, Lu and Ma, Yuanzheng and Li, Xiao-Hui}, title = {Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12064-12073} }
Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Weikai and Zhang, Jieyu and Jia, Taoyang and Zheng, Chenhao and Gao, Ziqi and Park, Jae Sung and Krishna, Ranjay}, title = {Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6388-6398} }
Towards Generalized Multimodal Homography Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Jinkun and Cheng, Jiaxin and Zhang, Jie and Zhou, Yicong}, title = {Towards Generalized Multimodal Homography Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8408-8417} }
SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Rui and Hao, Weidong and Guan, Juntao and Rui, Lai and Wu, Tong and Zeng, Fanhong and Gu, Lin}, title = {SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6043-6053} }
Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Yangshi and Liu, Zheng and Lu, Feng}, title = {Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3101-3110} }
SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Zhanfeng and Zhang, Jiajun and Tu, Hanzhang and Wang, Zhixi and Gao, Yunqi and Zhang, Hongwen and Liu, Yebin}, title = {SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11798-11807} }
A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs-
[pdf]
[supp]
[bibtex]@InProceedings{Stalder_2026_CVPR, author = {Stalder, Nicolas and Grewe, Benjamin F. and Saponati, Matteo and Aceituno, Pau Vilimelis}, title = {A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6559-6568} }
Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Chengxin and Li, Yihui and Yang, Hongyu and Wang, YunHong}, title = {Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14198-14207} }
Splatent: Splatting Diffusion Latents for Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Hirschorn_2026_CVPR, author = {Hirschorn, Or and Sela, Omer and Huberman-Spiegelglas, Inbar and Efrat, Netalee and Alshan, Eli and Ideses, Ianir and Devernay, Frederic and Zvik, Yochai and Fritz, Lior}, title = {Splatent: Splatting Diffusion Latents for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8319-8330} }
Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Venkataramanan_2026_CVPR, author = {Venkataramanan, Shashanka and Pariza, Valentinos and Salehi, Mohammadreza and Knobel, Lukas and Ramzi, Elias and Gidaris, Spyros and Bursuc, Andrei and Asano, Yuki M}, title = {Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10533-10544} }
Benchmarking Single-Factor Physical Video-to-Audio Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingle and Gururani, Siddharth and Shih, Kevin J. and Bhatt, Gantavya and Lee, Sang-gil and Kong, Zhifeng and Goel, Arushi and Anumanchipalli, Gopala and Liu, Ming-Yu}, title = {Benchmarking Single-Factor Physical Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1939-1949} }
UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengfei and Li, Peng and Zhang, Zheng and Lu, Jiahao and Zhao, Chengfeng and Xue, Wei and Liu, Qifeng and Peng, Sida and Zhang, Wenxiao and Luo, Wenhan and Liu, Yuan and Guo, Yike}, title = {UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14038-14049} }
SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Ji, Li and Li, Ao and Zhang, Shiduo and Liu, Liming and Hou, Jinlong and Gong, Jingjing and Zhao, Xianzhong and Qiu, Xipeng}, title = {SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6718-6728} }
Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qihao and Mao, Chengzhi and Liu, Yaojie and Yuille, Alan and Chu, Wen-Sheng}, title = {Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1639-1650} }
FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Wang, Juntong and Min, Xiongkuo}, title = {FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4425-4437} }
MTA: Multimodal Task Alignment for BEV Perception and Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yunsheng and Yaman, Burhaneddin and Ye, Xin and Luo, Jingru and Tao, Feng and Mallik, Abhirup and Wang, Ziran and Ren, Liu}, title = {MTA: Multimodal Task Alignment for BEV Perception and Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {670-679} }
GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Ken and Yang, Yunhan and Sun, Jingxiang and Liu, Xihui and Liu, Yebin and Liang, Ding and Cao, Yan-Pei}, title = {GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6367-6376} }
Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Hee Min and Kang, Hyoa and Kim, Suji and Oh, Dokwan and Cho, Nam Ik}, title = {Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6812-6822} }
Unsafe2Safe: Controllable Image Anonymization for Downstream Utility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2026_CVPR, author = {Dinh, Minh and Jin, SouYoung}, title = {Unsafe2Safe: Controllable Image Anonymization for Downstream Utility}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3326-3336} }
Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Shengkai and Cheng, Zhiyong and Zhang, Zefan and Dong, Jianfeng and Li, Zhihui and Wang, Meng}, title = {Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13974-13983} }
FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruonan and Wang, Zheng and Liu, Debin and Lv, Shijie and Yang, Laurence Tianruo}, title = {FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10357-10366} }
MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zheng and Zhang, Qinchuan and Ye, Yuteng and Chen, Zhi and Ji, Penglei and Li, Mengfei and Zhang, Wenxiao and Liu, Yuan}, title = {MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12684-12694} }
One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Shijun and Xu, Jing and Li, Zhihang and Peng, Chunli and Yang, Xiaoda and Lu, Lijing and Hu, Kai and Zhang, Jiangning}, title = {One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4011-4021} }
Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Peng and Xie, Jun and Lin, Tao}, title = {Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2047-2057} }
E$^2$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Senyao and Wang, Haozhao and Jiang, Zhaobai and Jin, Zhanbo and Fan, Hao and Li, Ruixuan}, title = {E\${\textasciicircum}2\$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12944-12954} }
MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Yunpeng and Wang, Lihan and He, Zhaoshen and He, Xinqiang and Liao, Xingming and Wang, Zhuowei and Cheng, Lianglun}, title = {MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6432-6442} }
Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fei and Zhang, Xiwen and Qiu, Qingqing and Zhang, Lei and Wei, Wei and Ding, Chen and Zhang, Yi and Li, Liang and Yue, Xiangyu and Zhang, Yanning}, title = {Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7946-7956} }
Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jintu and Liu, Qizhe and Xu, Huangxin and Chen, Zhuojie}, title = {Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7503-7512} }
Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yuchen and Zhang, Zhenyu and Gu, Naibin and Chen, Yilong and Fu, Peng and Lin, Zheng and Wang, Shuohuan and Sun, Yu and Wu, Hua and Wang, Weiping and Wang, Haifeng}, title = {Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3511-3521} }
PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Price_2026_CVPR, author = {Price, Stephen and Cote, Danielle L. and Rundensteiner, Elke A.}, title = {PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6325-6335} }
Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongji and Yu, Manjiang and Yao, Junchi and Singh, Priyanka and Li, Xue and Wang, Di and Hu, Lijie}, title = {Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10251-10261} }
SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhening and Jeong, Hyeonho and Chen, Xuelin and Gryaditskaya, Yulia and Wang, Tuanfeng Y. and Lasenby, Joan and Huang, Chun-Hao}, title = {SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11218-11228} }
Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yudi and Zhou, Zhongao and Yang, Bin and Chen, Zhenghan and Ye, Mang}, title = {Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11273-11282} }
Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Zhu, Huixin and Dong, Weisheng and Wang, Yaonan and Mian, Ajmal}, title = {Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12695-12705} }
From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{George_2026_CVPR, author = {George, Daniel and Yeh, Charles and Lee, Daniel and Zhang, Yifei}, title = {From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3284-3293} }
SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Zepeng and Li, Kaiyu and Chen, Luodi and Li, Wanchen and Yuchen, Xiao and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13199-13210} }
AvatarPointillist: AutoRegressive 4D Gaussian Avatarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wu, Zijian and Wang, Yating and Wan, Ziyu and Ma, Yue and Liu, Runtao and Zhou, Boyao and Shen, Yujun and Chen, Qifeng}, title = {AvatarPointillist: AutoRegressive 4D Gaussian Avatarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11039-11050} }
Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bouzidi_2026_CVPR, author = {Bouzidi, Halima and Liu, Haoyu and Achamyeleh, Yonatan and Iddamsetty, Praneetsai and Al Faruque, Mohammad}, title = {Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13326-13335} }
VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargent_2026_CVPR, author = {Sargent, Kyle and Gao, Ruiqi and Henzler, Philipp and Herrmann, Charles and Holynski, Aleksander and Fei-Fei, Li and Wu, Jiajun and Zhang, Jason Y.}, title = {VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10462-10471} }
HiconAgent: History Context-aware Policy Optimization for GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xurui and Chen, Gongwei and Xie, Yuquan and Li, Zaijing and Zhou, Kaiwen and Wang, Shuai and Yang, Shuo and Tian, Zhuotao and Shao, Rui}, title = {HiconAgent: History Context-aware Policy Optimization for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13028-13038} }
CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Andrew and Kim, Jaemin and Lee, Sebin and Yoon, Sung-Eui}, title = {CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {966-975} }
Unblur-SLAM: Dense Neural SLAM for Blurry Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Rozumny, Denis and Girlanda, Francesco and Karaoglu, Sezer and Pollefeys, Marc and Gevers, Theo and Oswald, Martin R.}, title = {Unblur-SLAM: Dense Neural SLAM for Blurry Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {352-362} }
GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Zhang, Lei}, title = {GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2177-2187} }
SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yunnan and Zheng, Kecheng and Wang, Jianyuan and Chen, Minghao and Novotny, David and Rupprecht, Christian and Xu, Yinghao and Zhu, Xing and Zeng, Wenjun and Jin, Xin and Shen, Yujun}, title = {SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12628-12639} }
Direction-aware 3D Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Xuan, Weihao and Wang, Junjue and Yokoya, Naoto and Shao, Ling and Lu, Shijian}, title = {Direction-aware 3D Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9668-9678} }
Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhongxing and Wang, Zhonghua and Qian, Zhe and Shi, Dachuan and Tang, Feilong and Hu, Ming and Su, Shiyan and Zou, Xiaocheng and Feng, Wei and Mahapatra, Dwarikanath and Peng, Yifan and Lin, Minquan and Ge, Zongyuan}, title = {Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11064-11075} }
Hierarchical Action Learning for Weakly-Supervised Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Junxian and Cai, Ruichu and Fang, Juntao and Zhu, Hao and Xu, Boyan and Chen, Weilin and Li, Zijian and Gao, Shenghua}, title = {Hierarchical Action Learning for Weakly-Supervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6054-6064} }
MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Geonmo and Heo, Byeongho and Yu, Jaemyung and Hwang, Jaehui and Kim, Taekyung and Lee, Sangmin and Jun, HeeJae and Kang, Yoohoon and Yun, Sangdoo and Han, Dongyoon}, title = {MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1749-1758} }
Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Chen, Jixuan and Zhang, Kaiyi and Yu, Xinquan and Chan, Antoni B. and Huang, Hui}, title = {Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13626-13635} }
Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Geon and Yoon, Hangyul and Shin, Hyunju and Park, Hyunki and Seo, Sang Hoon and Yang, Eunho and Choi, Edward}, title = {Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1482-1492} }
Lifting Unlabeled Internet-level Data for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Zhang, Yaowei and Yu, Huangyue and He, Junchao and Wang, Yan and Huang, Jiangyong and Shen, Hongyu and Ni, Junfeng and Wang, Shaofei and Jia, Baoxiong and Zhu, Song-Chun and Huang, Siyuan}, title = {Lifting Unlabeled Internet-level Data for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5814-5827} }
AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seungeun and Moon, SeungJun and Lew, Hah Min and Kang, Ji-Su and Park, Gyeong-Moon}, title = {AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3998-4010} }
GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Saelyne and Yu, Jaesang and Peng, Yi-Hao and Lin, Kevin Qinghong and Cho, Jae Won and Song, Yale and Kim, Juho}, title = {GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13017-13027} }
Yume1.5: A Text-Controlled Interactive World Generation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Xiaofeng and Li, Zhen and Li, Chuanhao and Xu, Xiaojie and Ying, Kaining and Zhang, Kaipeng}, title = {Yume1.5: A Text-Controlled Interactive World Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7752-7761} }
UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chang, Haochen and Gao, Kun and Cheng, Yuan and Ren, Pengfei and Yin, Erwei}, title = {UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8857-8867} }
Monet: Reasoning in Latent Visual Space Beyond Image and Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qixun and Shi, Yang and Wang, Yifei and Zhang, Yuanxing and Wan, Pengfei and Gai, Kun and Ying, Xianghua and Wang, Yisen}, title = {Monet: Reasoning in Latent Visual Space Beyond Image and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12030-12040} }
GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2026_CVPR, author = {Na, Youngju and Yun, Jaeseong and Ryu, Soohyun and Kim, Hyunsu and Yoon, Sung-Eui and Yeon, Suyong}, title = {GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7256-7265} }
CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Huidong and Chen, Wentao and Chen, Jie and Cai, Xinqi and Ma, Ruolong and Zheng, Yinglin and Lin, Yuxin and Zeng, Ming}, title = {CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11346-11356} }
When to Think and When to Look: Uncertainty-Guided Lookback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Jing and Bellos, Filippos and Guo, Junjia and Li, Yayuan and Huang, Chao and Tang, Yunlong and Song, Luchuan and Liang, Susan and Zhang, Zhongfei and Corso, Jason J. and Xu, Chenliang}, title = {When to Think and When to Look: Uncertainty-Guided Lookback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5104-5113} }
MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhong, Ningze and Fu, Zhiheng and Wang, Longguang and Zhang, Ye and Guo, Yulan}, title = {MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6219-6228} }
ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Weihao and Zhang, Shan and Sun, Yanpeng and Wu, Jingjing and Xie, Qunyi and Tan, Xiao and Chen, Kunbin and He, Wei and Li, Xiaofan and Zhao, Na and Wang, Jingdong and Li, Zechao}, title = {ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5476-5486} }
Learnability-Driven Submodular Optimization for Active Roadside 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ruiyu and Zhang, Baoming and Ruozzi, Nicholas and Guo, Yunhui}, title = {Learnability-Driven Submodular Optimization for Active Roadside 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11579-11588} }
Tea-Adapter: Teacher Adapter for Efficient Conditional Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yinhan and Ma, Yue and Yi, Fangqiu and Qi, Chenyang and Zhang, Chi and Feng, Kunyu and Wang, Zeyu}, title = {Tea-Adapter: Teacher Adapter for Efficient Conditional Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4805-4815} }
Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xinghao and Niu, Jianwei and Liu, Xuefeng and Zhu, Guogang and Zhang, Jiayuan and Tang, Shaojie and Chen, Wei}, title = {Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10313-10323} }
Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Han and Tang, Haoyu and Mu, Xiaoxuan and Li, Chen and Zhu, Jihua}, title = {Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9762-9772} }
TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fang and Zou, Shihao and Si, Weixin and Gao, Yang and Li, Shuai and Hao, Aimin}, title = {TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2811-2820} }
Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs-
[pdf]
[supp]
[bibtex]@InProceedings{Janjua_2026_CVPR, author = {Janjua, Muhammad Kamran and Silva, Hugo and Niu, Di and Rashidi, Bahador}, title = {Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5165-5174} }
Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunbei and Cai, Chengyi and Liu, Feng and Hamm, Jihun}, title = {Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6176-6187} }
Clothe and Pose-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Nakul and Bansal, Aayush and Vo, Minh}, title = {Clothe and Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2015-2024} }
FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Weiheng and Yu, An and Li, Jian and Zhang, Zhenfei and Ye, Felix X.-F. and Chang, Ming-Ching}, title = {FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1651-1660} }
Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Qiwei and Cai, Boyang and Lai, Minghao and Zhuang, Sitong and Lin, Tao and Qin, Yan and Ye, Yixuan and Liang, Jiaming and Xu, Renjing}, title = {Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13419-13429} }
Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fayyazi_2026_CVPR, author = {Fayyazi, Arya and Akrami, Haleh}, title = {Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5144-5153} }
Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2026_CVPR, author = {Ham, Yujin and Kim, Junho and Boominathan, Vivek and Balakrishnan, Guha}, title = {Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4322-4331} }
RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yueying and Guo, Yiteng and Zhang, Weidong and Wen, Jie and Shen, Liquan and Yan, Huaicheng and Xu, Xin}, title = {RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4393-4402} }
Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hongyu and Chen, Haipeng and Xu, Zhimin and Yang, Chengxin and Lyu, Yingda}, title = {Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1461-1471} }
CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singha_2026_CVPR, author = {Singha, Mainak and Mehrotra, Sarthak and Casari, Paolo and Chaudhuri, Subhasis and Ricci, Elisa and Banerjee, Biplab}, title = {CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9582-9592} }
TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jun and Wang, Teng and Ge, Yuying and Ge, Yixiao and Li, Xinhao and Wang, Limin}, title = {TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10419-10429} }
HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Woo_2026_CVPR, author = {Woo, Suhan and Lee, Seongwon and Jang, Jinwoo and Kim, Euntai}, title = {HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12183-12192} }
VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tao Jun and Shi, Yujiao and Li, Hongdong}, title = {VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5409-5420} }
AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Liu, Yichen and Li, Xiangyu and Yin, Hang and Chen, Huangxing and Zheng, Wenzhao and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4065-4075} }
Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Sui_2026_CVPR, author = {Sui, Zhipeng and Hao, Haiqing and He, Weihua and Lee, Seng-Hong and Wang, Wenhui}, title = {Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {946-955} }
RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Cui, Zhenyu and Zang, Zehua and Zhou, Jiahuan}, title = {RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10741-10752} }
Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Runze and Cheng, Yiji and Hang, Tiankai and Li, Zhimin and Xu, Yu and Yin, Zijin and Zhang, Shiyi and Dai, Wenxun and Du, Penghui and Ma, Ao and Wang, Chunyu and Lu, Qinglin and Han, Jizhong and Dai, Jiao}, title = {Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9051-9062} }
PositionIC: Unified Position and Identity Consistency for Image Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junjie and Han, Tianyang and Ma, Kai and Gao, Jialin and Song, Yang and He, Xianhua and Luo, Junfeng and Wei, Xiaoming and Zhang, Wenqiang}, title = {PositionIC: Unified Position and Identity Consistency for Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9139-9148} }
CLIP Is Shortsighted: Paying Attention Beyond the First Sentence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lavoie_2026_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Zaimi, Aldo and Tchango, Arsene Fansi and Waslander, Steven L.}, title = {CLIP Is Shortsighted: Paying Attention Beyond the First Sentence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9524-9534} }
Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Bao and Ho, Sy-Tuyen and Hao, Koh Jun and Cheung, Ngai-Man}, title = {Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10283-10292} }
Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonggon and Moon, Heejoon and Hong, Je Hyeong}, title = {Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {113-123} }
Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Yu and Gao, Longjun and Su, Yuanqi and Lu, HaoAng and Zhang, Xiaoning}, title = {Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5751-5761} }
TV2TV: A Unified Framework for Interleaved Language and Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xiaochuang and Emad, Youssef and Hall, Melissa and Nguyen, John and Padthe, Karthik and Robbins, Liam and Bar, Amir and Chen, Delong and Drozdzal, Michal and Elbayad, Maha and Hu, Yushi and Li, Shang-Wen and Verbeek, Jakob and Wang, XuDong and Ghazvininejad, Marjan and Zettlemoyer, Luke and Dinan, Emily}, title = {TV2TV: A Unified Framework for Interleaved Language and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7695-7706} }
Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Chuanqing and Lu, Xin and Deng, Zehui and Lu, Zhengda and Wang, Yiqun and Diao, Junqi and Xiao, Jun}, title = {Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4881-4890} }
ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhuan and Pan, Aoxuan and Huang, Yi-Hua and Chang, Chirui and Dai, Peng and Yu, Xin and Qi, Xiaojuan}, title = {ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5828-5838} }
SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiang and Li, Feifei and Zhang, Mi and Hong, Geng and You, Xiaoyu and Yang, Min}, title = {SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {690-700} }
Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sidan and Xu, Hongteng and Luo, Dixin}, title = {Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7684-7694} }
GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Vardhana_2026_CVPR, author = {Vardhana, Korada Sri and Biswas, Soma}, title = {GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2327-2335} }
RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Panjun and Xia, Jiyuan and Guan, Yuanshen and Li, Yong and Lang, Zhiqiang and Xu, Ruikang and Chen, Chang and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5617-5626} }
NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanying and Li, Jinyang and He, Shengfeng and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5038-5048} }
FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yidi and Fan, Zihao and Huang, Jie and Xiao, Jie and Li, Dong and Zhang, Wenlong and BAI, LEI and Fu, Xueyang and Zha, Zheng-jun}, title = {FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4839-4849} }
PrivateEyes: Gaze-Preserving Anonymization for Data Sharing-
[pdf]
[supp]
[bibtex]@InProceedings{Gupta_2026_CVPR, author = {Gupta, Surabhi and Muthumariappan, Dinesh Prabhu and Das, Biplab and Rajagopal, Anoop Kolar and Iyer, Kiran Nanjunda and Seo, Donghwan}, title = {PrivateEyes: Gaze-Preserving Anonymization for Data Sharing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3274-3283} }
All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yi and Wu, Libing and Zhang, Zhuangzhuang and Qiu, Jing and Huo, Lijuan and Feng, Jiaqi}, title = {All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6549-6558} }
C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xuewei and Yang, Jiayue and Zeng, Zhiwen and Zhang, Yanyong and Xia, Yan}, title = {C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2928-2937} }
Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jingchen and Han, Shaobo and Patel, Deep and Kohno, Wataru and Jin, Can and Chen, Changyou}, title = {Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5585-5595} }
GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning-
[pdf]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Fei and Gu, Zhangxuan and Lu, Zhengxi and Zhang, Shangzhan and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yan, Yuchen and Zhang, Wenqi and Shen, Yongliang and Lu, Weiming and Zhuang, Yueting}, title = {GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13007-13016} }
A3: Towards Advertising Aesthetic Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Kaiyuan and Gao, Yixuan and Sun, Lu and Zheng, Yushuo and Chen, Zijian and Zhang, Jianbo and Zhu, Xiangyang and Tian, Yuan and Zhang, Zicheng and Zhai, Guangtao}, title = {A3: Towards Advertising Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9478-9490} }
SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaybouti_2026_CVPR, author = {Chaybouti, Sofian and Narayan, Sanath and Dahou, Yasser and Khắc, Ph\'uc H. L\^e and Singh, Ankit and Huynh, Ngoc and Para, Wamiq Reyaz and Kuehne, Hilde and Hacid, Hakim}, title = {SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10071-10081} }
FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhiyuan and Wang, Can and Chen, Dongdong and Liao, Jing}, title = {FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4221-4231} }
Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Qiaojie and Zhang, Jiucai and Zhang, Amy and Zhang, Xiaoli}, title = {Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13793-13801} }
Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sani_2026_CVPR, author = {Sani, Depanshu and Anand, Saket}, title = {Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11263-11272} }
CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Yushan and Zhang, Hui and Xia, Qiming and Jin, Yi and Li, Yidong}, title = {CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2983-2992} }
Consistent Instance Field for Dynamic Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Junyi and Nguyen, Van Nguyen and Planche, Benjamin and Tao, Jiachen and Sun, Changchang and Gao, Zhongpai and Zhao, Zhenghao and Choudhuri, Anwesa and Zhang, Gengyu and Zheng, Meng and Wang, Feiran and Chen, Terrence and Yan, Yan and Wu, Ziyan}, title = {Consistent Instance Field for Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3783-3793} }
FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Dai, Kunyan and Ding, Yi and Ni, Ruobing and Zhang, Ying and Wang, Wenwu and Xie, Zhifeng}, title = {FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4291-4300} }
Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Hao and Huo, Yadong and Qin, Qibing and Zhang, Wenfeng and Huang, Lei}, title = {Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2671-2681} }
UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haopeng and Ai, Yihao and Kim, Kabeen and Tan, Robby T. and Chen, Yixin and Wang, Bo}, title = {UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13781-13792} }
GVIS: Generative Vector Image Steganography-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zihao and Xu, Dawei and Li, Zihan and Zheng, Xixi and Zhang, Chuan}, title = {GVIS: Generative Vector Image Steganography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9384-9393} }
Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Yi, Liang and Wang, Shuo and Zhu, Wenbo and Wu, Yongliang and Zhu, Beier and Zhang, Hanwang}, title = {Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9976-9985} }
MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morkva_2026_CVPR, author = {Morkva, Svitlana and Patil, Vaishakh and Tonioni, Alessio and Oechsle, Michael and Wilder-Smith, Maximum and Hutter, Marco}, title = {MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1167-1176} }
Physically Inspired Gaussian Splatting for HDR Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Huimin and Bai, Yue and Wang, Hailing and Fu, Yun}, title = {Physically Inspired Gaussian Splatting for HDR Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11808-11817} }
Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yingkai and Chen, Chaoqi and Huang, Hui}, title = {Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7957-7966} }
ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinxin and Wang, Xue and Zhou, Guoqing and Wang, Qing}, title = {ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {261-271} }
Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhongze and Su, Xiu and Yang, Feng and Niu, Dan and You, Shan and Luo, Yueyi and Long, Jun}, title = {Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6271-6281} }
rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Tianyang and Chang, Ming and Chen, Yan and Hu, Yang}, title = {rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1365-1375} }
MatMart: Material Reconstruction of 3D Objects via Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiuchao and Zhu, Pengfei and Lyu, Jiangjing and Liu, Xinguo and Guo, Jie and Guo, Yanwen and Xu, Weiwei and Lyu, Chengfei}, title = {MatMart: Material Reconstruction of 3D Objects via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2336-2345} }
No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tao and Ren, Kan and Wan, Gang and Wen, Shibo}, title = {No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6868-6877} }
LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zekun and An, Sizhe and Tang, Chengcheng and Guo, Chuan and Shugurov, Ivan and Zhang, Linguang and Zhao, Amy and Sridhar, Srinath and Tao, Lingling and Mittal, Abhay}, title = {LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2209-2220} }
Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Kai and Pan, Qingtao and Li, Shuo}, title = {Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3627-3635} }
Stable and Efficient Single-Rollout RL for Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Rui and Yu, Dian and Ke, Lei and Liu, Haolin and Zhou, Yujun and Liang, Zhenwen and Mi, Haitao and Tokekar, Pratap and Yu, Dong}, title = {Stable and Efficient Single-Rollout RL for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12009-12018} }
From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Le and Yang, Jihan and Krishnan, Soundarya and Majmudar, Jimit and Ge, Xiou and Puri, Prasoon and Saraf, Prathamesh and Bhargava, Shruti and Piraviperumal, Dhivya and Ling, Yinan and Pan, Cindy and Yu, Hong and Agrawal, Aishwarya and Tseng, Bo-Hsiang}, title = {From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12052-12063} }
Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Ito_2026_CVPR, author = {Ito, Koichiro}, title = {Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9501-9511} }
SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Meng, Xiangting and Wu, Ke and Ding, Wenchao}, title = {SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5049-5058} }
E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yunsoo and Sung, Changki and Hong, Dasol and Myung, Hyun}, title = {E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4922-4931} }
Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wenbin and Iwaguchi, Takafumi and Sagawa, Ryusuke and Kawasaki, Hiroshi}, title = {Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12501-12510} }
TrackMAE: Video Representation Learning via Track Mask and Predict-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vandeghen_2026_CVPR, author = {Vandeghen, Renaud and Thoker, Fida Mohammad and Van Droogenbroeck, Marc and Ghanem, Bernard}, title = {TrackMAE: Video Representation Learning via Track Mask and Predict}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13604-13614} }
Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Wu, Benyu and Hong, Jie and Zhou, Kun}, title = {Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2434-2444} }
FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Chenhan and Chen, Yu and Zhang, Qingwen and Song, Jifei and Xu, Songcen and Yeung, Dit-Yan and Deng, Jiankang}, title = {FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {330-340} }
CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yucheng and Wang, Zedong and Wu, Yuetong and Ma, Yue and Xu, Dan}, title = {CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9019-9028} }
PGA: Prior-free Generative Attack for Practical No-box Scenario-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Hongyu and Yuan, Xiang and Cheng, Gong}, title = {PGA: Prior-free Generative Attack for Practical No-box Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13296-13305} }
Translating Signals to Languages for sEMG-Based Activity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ming and Qu, Haoxuan and Ke, Qiuhong and Zhou, Wei and Rahmani, Hossein and Liu, Jun}, title = {Translating Signals to Languages for sEMG-Based Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9317-9329} }
Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yingjie and Wang, Yi and Wang, Jiaze and Liu, Anfeng and Tian, Zhuotao}, title = {Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6075-6084} }
Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Sanghyeok and Ahn, Pyunghwan and Song, Gwangmo and Kim, Seung Hwan and Lee, Honglak and Han, Bohyung}, title = {Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11283-11292} }
Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Donghai and Li, Yongheng and Wang, Zhen and Zeng, Yuansong and Min, Wenwen}, title = {Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5720-5729} }
OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Hengrui and Gu, Zhuangcheng and Zhao, Zhiyuan and Wen, Zichen and Wang, Bin and Li, Weijia and He, Conghui}, title = {OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3208-3218} }
Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Wang, Wenjing and Li, Chen and Lyu, Jing and Lin, Di}, title = {Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1885-1894} }
Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wooseong and Lee, Wonyoung and Yoon, Kuk-Jin}, title = {Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {823-835} }
HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tateno_2026_CVPR, author = {Tateno, Masatoshi and Kato, Gido and Kataoka, Hirokatsu and Sato, Yoichi and Yagi, Takuma}, title = {HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3455-3465} }
Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Patwari_2026_CVPR, author = {Patwari, Kartik and Vesdapunt, Noranart and Wang, Chien-Yi and Li, Dawei and Huynh, Cong Phuoc and Zhou, Ning and Chuah, Chen-Nee and Fu, Kah Kuen}, title = {Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13812-13823} }
SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yang and Zhang, Jiqing and Sun, Chuanyu and Liu, Qianhui and Ge, Huilin and Wei, Ziqi and Yang, Xin}, title = {SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {926-935} }
Stabilizing Streaming Video Geometry via Dynamic Feature Normalization-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xiaoyang and Liu, Muxin and Wu, Xiaoshan and Wang, Ruicheng and Huang, Yi-Hua and Sun, Yang-Tian and Shi, Shaoshuai and Qi, Xiaojuan}, title = {Stabilizing Streaming Video Geometry via Dynamic Feature Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7577-7587} }
PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Wubin and Gai, Shaoyan and Da, Feipeng}, title = {PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4698-4707} }
Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuefei and Horstmann, Kai and Lin, Ethan and Chen, Jonathan and Farhang, Alexander and Stiles, Sophia and Sehgal, Atharva and Light, Jonathan and Van Valen, David and Yue, Yisong and Sun, Jennifer J.}, title = {Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13680-13690} }
SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chen and Dong, Shanshan and Qiu, Sheng and Han, Jianmin and Zhao, Yibo and Gao, Zan and Komura, Taku and Huang, Kemeng}, title = {SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7414-7424} }
FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Huy Q. and Nguyen, Loc X. and Qiao, Yu and Kim, Seong Tae and Huh, Eui-Nam and Hong, Choong Seon}, title = {FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3390-3399} }
MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Maklachur and Jung, Soon Ki and Hammond, Tracy}, title = {MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8556-8565} }
Global Underwater Geolocation from Time-Lapse Polarization Imagery-
[pdf]
[supp]
[bibtex]@InProceedings{Aghajanzadeh_2026_CVPR, author = {Aghajanzadeh, Sara and Bai, Xiaoyang and Zhu, Zhongmin and Forsyth, David and Gruev, Viktor}, title = {Global Underwater Geolocation from Time-Lapse Polarization Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6464-6473} }
Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kaibing and Wang, Yucheng and Luo, Tingzhang}, title = {Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11303-11312} }
MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Lulu and Xiao, Wenhu and Chen, Xin and Xu, Xinhua and Xu, Bowen and Li, Kun and Tao, Yongliang}, title = {MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8707-8716} }
CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yu and Cai, Yujun and Zhang, Chi}, title = {CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7654-7663} }
ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2026_CVPR, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Tweedy, Sean M. and Yu, Xin}, title = {ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13940-13950} }
RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuhong and Gao, Zihan and Li, Shengpeng and Chen, Ling-Hao and Liu, Kaisheng and Cheng, Runqing and Lin, Xiao and Liu, Junjia and Li, Zhuoheng and Feng, Jingyi and He, Ziyan and Lin, Jintian and Huang, Zheyan and Liu, Zhifang and Wang, Haoqian}, title = {RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6664-6674} }
Neural Distribution Prior for LiDAR Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zizhao and Xiang, Zhengkang and Ao, Jiayang and Liu, Feng and West, Joseph and Khoshelham, Kourosh}, title = {Neural Distribution Prior for LiDAR Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3035-3045} }
Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Zeng, Guanning and Hu, Zhanhao and Zhu, Jun and Hu, Xiaolin}, title = {Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13356-13365} }
LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding-
[pdf]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Rongge and Dong, Chengqi and Zhou, S Kevin}, title = {LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12129-12139} }
HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Huizhi and Shen, Yichao and Deng, Yu and Xu, Sicheng and Feng, ZhiYuan and Zhang, Tong and Liang, Yaobo and Yang, Jiaolong}, title = {HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2502-2514} }
EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qinfu and Pan, Liyuan and Wei, Yiwei and Yuan, Shaozu and Chen, Jiaqi and Liu, Tianyu}, title = {EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1672-1682} }
ARC Is a Vision Problem!-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Keya and Cy, Ali and Qiu, Linlu and Ding, Xiaoman Delores and Wang, Runqian and Zhu, Yeyin Eva and Andreas, Jacob and He, Kaiming}, title = {ARC Is a Vision Problem!}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2537-2546} }
VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Jihwan and Do, Jaeyoung}, title = {VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3410-3420} }
Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Boya and Guan, Naiyang and Yi, Xiaodong}, title = {Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8308-8318} }
Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Zhang, Xiaoqin and Lu, Huchuan and Zhang, Lihe}, title = {Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14178-14187} }
TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Yang, Li and Zhan, Gen and Zhang, Yabin and Liao, Yiting and Li, Junlin and Fu, Deliang and Zhang, Li and Wang, Limin}, title = {TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2756-2767} }
DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Chang and Li, Changlin and Liu, Songtao and Zhong, Zhao and Huang, Kailin and Zhang, Linfeng}, title = {DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4590-4601} }
KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Chen, Wenhao and Wang, Yongtao and Yang, Ming-Hsuan}, title = {KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3740-3749} }
EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Da and Engel, Dominik and Luo, Deng and Viola, Ivan}, title = {EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7610-7619} }
OSMO: Open-vocabulary Self-eMOtion Tracking-
[pdf]
[bibtex]@InProceedings{Abdelfattah_2026_CVPR, author = {Abdelfattah, Mohamed and Tekin, Bugra and Sener, Fadime and Camgoz, Necati Cihan and Sauser, Eric and Ma, Shugao and Alahi, Alexandre and Remelli, Edoardo}, title = {OSMO: Open-vocabulary Self-eMOtion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1737-1748} }
DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yichen and Song, Jyun-Ting and Jung, Siyeol and blank, Ulsan National Institute of Science \& Technology and Liu, Ruofan and Liu, Haiyang and Chu, Xuangeng and Liu, Ruicong and Wu, Erwin and Koike, Hideki and Kitani, Kris}, title = {DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10932-10942} }
CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yansong and Qiu, Zhongxi and Tian, Yun and Jinyu, Zheng and Li, Shuo}, title = {CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7003-7013} }
CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Junyang and Liu, Qifan and Yang, Wenming and He, Zhihai}, title = {CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4199-4209} }
Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kang and Chen, Yuning and Du, Wan}, title = {Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12533-12543} }
SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guiyu and Chen, Yabo and Xiang, Xunzhi and Huang, Junchao and Wang, Zhongyu and Jiang, Li}, title = {SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11127-11137} }
GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Haifeng and Han, Wenshuo and Wang, Zhouyu and Feng, Runyang and Tang, Fan and Lee, Tong-Yee and Fan, Zipei and Wu, Ruihai and Wang, Yuran and Dong, Hao and Chen, Hechang and Chang, Hyung Jin and Gao, Yixing}, title = {GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6631-6641} }
Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yun and Nam, Hyeok and Cho, Sung In}, title = {Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3617-3626} }
Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Lin, Zihan and Zhang, Yixin and Wang, Zilei}, title = {Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6249-6260} }
Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhiceng and Wang, Changmiao and Wan, Jun and Min, Wenwen}, title = {Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5710-5719} }
SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Minghan and Yang, Lan and Li, Ke and Zhang, Honggang and Pang, Kaiyue and Song, Yi-Zhe}, title = {SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13658-13669} }
3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Minchong and Yuan, Xiaoyun and Wan, Junzhe and Zhang, Jianing and Zhang, Jun}, title = {3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5659-5669} }
Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yuwu and Liu, Chunzhi}, title = {Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {801-811} }
FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiang and Pan, Jinshan and Dong, Jiangxin and Yang, Jian and Tang, Jinhui}, title = {FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8471-8480} }
Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jaein and Bin Yoo, Hee and Han, Dong-Sig and Zhang, Byoung-Tak}, title = {Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9986-9995} }
PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Wang, Yaohui and Shao, Shuai and Br\'emond, Fran\c{c}ois and Wang, Jiangtao}, title = {PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6085-6094} }
Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Baek_2026_CVPR, author = {Baek, Seung Hyup and Lee, Jimin and Lee, Hyeongkeun and Cho, Jae Won}, title = {Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3432-3442} }
HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ting and Chen, Daoyuan and Jiao, Qirui and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4494-4504} }
SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qiuyang and Cheng, Jiujun and Mao, Qichao and Liu, Cong and Fang, Yu and Li, Yuhong and Ge, Mengying and Gao, Shangce}, title = {SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6802-6811} }
Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Tianchen and Liu, Chen and Yu, Xin}, title = {Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7154-7164} }
Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Minxue and Yu, Yangyang and Ding, Aolin and Pouyan, Maziyar Baran and Belkhouja, Taha and Bao, Yujia}, title = {Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7978-7989} }
Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yu and Su, Lutong and Huang, Ruixiang and Jiang, Tianji and Tang, Jiadong and Yue, Yufeng and Yang, Yi}, title = {Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7310-7319} }
Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sijie and Qian, Biao and Han, Jungong}, title = {Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10472-10481} }
FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kou_2026_CVPR, author = {Kou, Zhiqiang and Wu, Junxiang and Huang, Wenke and He, Wenwen and Xie, Ming-Kun and Wang, Changwei and Jia, Yuheng and Jiang, Di and Liu, Yang and Geng, Xin and Yang, Qiang}, title = {FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10324-10334} }
MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mantes_2026_CVPR, author = {Mantes, Albert Dominguez and La Manno, Gioele and Weigert, Martin}, title = {MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13648-13657} }
SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Soyeon and Seo, Chang Wook and Shim, Hyunjung}, title = {SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7142-7153} }
OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Haochen and Ren, Pengfei and Zhang, Buyuan and Li, Da and Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7068-7078} }
Reinforcing Structured Chain-of-Thought for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Peiyao and Xu, Haotian and Vesdapunt, Noranart and Hou, Rui and Zhang, Jingyi and Ling, Haibin and Obiednikov, Oleksandr and Zhou, Ning and Fu, Kah Kuen}, title = {Reinforcing Structured Chain-of-Thought for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9794-9803} }
Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Dong In and Doh, Hyungjun and Chi, Seunggeun and Duan, Runlin and Kim, Sangpil and Ramani, Karthik}, title = {Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1187-1197} }
Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Boyu and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Cui, Ruochen and Zhao, Xilin and Huang, Qingming}, title = {Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2369-2380} }
Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jin-Seop and Lee, SungJoon and Jung, SeongJun and Li, Boyang and Lee, Jee-Hyong}, title = {Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10397-10407} }
Rethinking Concept Bottleneck Models: From Pitfalls to Solutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tapli_2026_CVPR, author = {Tapli, Merve and Bouniot, Quentin and Stammer, Wolfgang and Akata, Zeynep and Akbas, Emre}, title = {Rethinking Concept Bottleneck Models: From Pitfalls to Solutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9901-9910} }
WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xuweiyi and Zhou, Wentao and Cheng, Zezhou}, title = {WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1252-1264} }
Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hongsong and Cheng, Renxi and Han, Chaolei and Gui, Jie}, title = {Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14062-14072} }
Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Do_2026_CVPR, author = {Do, Minh Kha and Xiang, Wei and Han, Kang and Wu, Di and Phan, Khoa and Chen, Yi-Ping Phoebe and Liu, Gaowen and Kompella, Ramana Rao}, title = {Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6453-6463} }
Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Wang and Wang, Hanjing and Zhang, Yufei and Udayanga, Darsha and Ji, Qiang}, title = {Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6136-6146} }
Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Hung, Yi-Chun and Pytlarz, Jaclyn and Guo, Qi and Alexander, Emma}, title = {Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {220-230} }
IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Guohao and Wang, Yufei and Ma, Sizhuo and Xie, Yuege and Cheng, Yuting and Tao, Zhiqiang and Wang, Jian}, title = {IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3522-3531} }
Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Yin, Shuyu and Liu, Peilin and Ying, Rendong and Wen, Fei}, title = {Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {836-846} }
VL-RouterBench: A Benchmark for Vision-Language Model Routing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhehao and Lin, Baijiong and Zhang, Jingyuan and Wang, Jingying and Liu, Yuhang and Lu, Ning and Li, Tao and Huang, Xiaolin}, title = {VL-RouterBench: A Benchmark for Vision-Language Model Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9512-9523} }
RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jielun and Pun, Chi-Man and Huang, Guoheng}, title = {RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6601-6610} }
Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Haato and Umetani, Nobuyuki}, title = {Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4932-4941} }
Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ilhan_2026_CVPR, author = {Ilhan, Fatih and Liu, Gaowen and Kompella, Ramana Rao and Tekin, Selim Furkan and Huang, Tiansheng and Yahn, Zachary and Xu, Yichang and Liu, Ling}, title = {Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10482-10491} }
RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Chenxu and Wang, Chenxu and Dai, Yimian and Liu, Yongxiang and Cheng, Ming-Ming and Li, Xiang}, title = {RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13285-13295} }
UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization-
[pdf]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xiao and Tang, Huaizhi and Zhang, Feiyang and Yuan, Shiji and Hu, Chun and Zheng, Dezhi and Ma, Kang}, title = {UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5399-5408} }
AHS: Adaptive Head Synthesis via Synthetic Data Augmentations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Jang, Hyojin and Jeong, Sohyun and Moon, Seunggi and Kim, Gihwi and Jung, Hoon Jin and Choo, Jaegul}, title = {AHS: Adaptive Head Synthesis via Synthetic Data Augmentations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2125-2135} }
Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities-
[pdf]
[supp]
[bibtex]@InProceedings{Katsikas_2026_CVPR, author = {Katsikas, Dimitrios and Passalis, Nikolaos and Tefas, Anastasios}, title = {Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10512-10521} }
Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Y. and Huang, Chao and Liang, Susan and Bi, Jing and Wang, Yicheng and Shimada, Daiki and Xu, Chenliang}, title = {Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2799-2810} }
Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Kaichen and Wang, Zihao and Li, Muyao and Liu, Anji and Liang, Yitao}, title = {Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {724-734} }
Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Morui and Zhu, Yongqi and Fu, Song and Yang, Qing}, title = {Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10668-10677} }
Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xianglin and Wang, Jian and Wang, Xiaolei and Zhang, Zhen and Xiao, Jimin}, title = {Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6346-6356} }
R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhuangzi and Jin, Jian and Cai, Shilv and Lin, Weisi}, title = {R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9468-9477} }
ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Weikai and Zeng, Ziqian and Zhang, Kehua and Li, Haoran and Zhuang, Huiping and Wang, Ruidong and Chen, Cen and Peng, Hao}, title = {ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31-40} }
SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuda_2026_CVPR, author = {Matsuda, Ryosuke and Kudo, Keito and Yoshida, Haruto and Shimizu, Nobuyuki and Suzuki, Jun}, title = {SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7784-7794} }
Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhimeng and Yuan, Rongao and Gao, Junlong and Mao, Qi and Ma, Siwei and Gao, Wen and Jia, Chuanmin}, title = {Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5326-5336} }
Lynx: Towards High-Fidelity Personalized Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sang_2026_CVPR, author = {Sang, Shen and Zhi, Tiancheng and Gu, Tianpei and Liu, Jing and Luo, Linjie}, title = {Lynx: Towards High-Fidelity Personalized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9192-9202} }
Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yufeng and Wu, Wenxu and Wu, Shaojin and Huang, Mengqi and Ding, Fei and He, Qian}, title = {Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1906-1916} }
Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yiming and Chen, Chenghao and Wu, Changkun and Fu, Chong and Zhu, Biru and Wen, Zhenyu and Hong, Zhen}, title = {Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1503-1512} }
Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Sudong and Yuan, Shuai and Chen, Bingzhi and Mao, Rui and Wang, Bing}, title = {Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11621-11631} }
Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuze and Gong, Dong and Cao, Xiao and Yuan, Junchao and Li, Dongsheng and Zhou, Lei and Koh, Yun Sing and Yan, Cheng and Zhang, Xinyu}, title = {Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11207-11217} }
Sky2Ground: A Benchmark for Site Modeling under Varying Altitude-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zengyan and Mitra, Sirshapan and Modi, Rajat and Lim, Hui and Rawat, Yogesh}, title = {Sky2Ground: A Benchmark for Site Modeling under Varying Altitude}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12227-12236} }
FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Runqi and Paren, Alasdair and Yuan, Suqin and Li, Muyang and Torr, Philip and Bibi, Adel and Liu, Tongliang}, title = {FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8610-8620} }
E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Mayur and Akada, Hiroyasu and Rhodin, Helge and Theobalt, Christian and Golyanik, Vladislav}, title = {E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14017-14026} }
Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Yunpeng and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8535-8544} }
The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Wei, Fanyue and Xu, Runze and Li, Jingjing and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8237-8246} }
OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yan and Cheng, Zhengxue and Zhang, Junxuan and Zhou, Dajiang and Gu, Qunshan and Wang, Qi and Song, Li}, title = {OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5337-5347} }
ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control-
[pdf]
[supp]
[bibtex]@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Akihisa and Yu, Qing and Simo-Serra, Edgar and Fujiwara, Kent}, title = {ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2305-2315} }
Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kazimi_2026_CVPR, author = {Kazimi, Tahira and Dunlop, Connor and Yanardag, Pinar}, title = {Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12839-12848} }
Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuanyuan and Gong, Yuning and Liu, Yifei and Li, Jingfeng and Xu, Dan and Zhang, Yanci and Zhang, Dingwen and Sun, Xiao and Zhong, Zhihang}, title = {Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7330-7339} }
AnyPcc: Compressing Any Point Cloud with a Single Universal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kangli and Yi, Qianxi and Ye, Yuqi and Li, Shihao and Gao, Wei}, title = {AnyPcc: Compressing Any Point Cloud with a Single Universal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2972-2982} }
InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yang and Yang, Yuyin and Xie, Yiman and Cai, Zetao and Shi, Xu and Gao, Ning and Liu, Hangxu and Jiang, Xuekun and Qiu, Zherui and Yuan, Feng and Li, Yaping and Wang, Ping and Cai, Junhao and Zeng, Jia and Dong, Hao and Pang, Jiangmiao}, title = {InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {976-985} }
ART: Articulated Reconstruction Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zizhang and Zhang, Cheng and Li, Zhengqin and Howard-Jenkins, Henry and Lv, Zhaoyang and Geng, Chen and Wu, Jiajun and Newcombe, Richard and Engel, Jakob and Dong, Zhao}, title = {ART: Articulated Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7468-7479} }
SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yongkang and Cheng, Yu and Zhang, Yushuo and Xie, Yuan and Yin, Zhaoxia}, title = {SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3876-3886} }
Occluded Human Body Capture with Frequency Domain Denoising Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Buzhen and Xu, Chongyang and Tang, Wentao and Shu, Yuan and Ju, Jingyi and Zuo, Binghui and Wang, Yangang}, title = {Occluded Human Body Capture with Frequency Domain Denoising Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13930-13939} }
MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Koleilat_2026_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Nejatimanzari, Omid and Barile, Berardino and Xiao, Yiming and Rivaz, Hassan}, title = {MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1406-1417} }
Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajing and Zhang, Yumeng and Si, Yue and Fan, Baojie and Tian, Jiandong}, title = {Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {812-822} }
Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Shuang and Huang, Debao and Deng, Deyan and Xiong, Haolin and Tang, Yang and Zhao, Yajie and Qin, Rongjun}, title = {Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6474-6483} }
UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiehui and Zhang, Yuechen and He, Xu and Gao, Yuan and Cen, Zhi and Xia, Bin and Zhou, Yan and Tao, Xin and Wan, Pengfei and Jia, Jiaya}, title = {UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4471-4481} }
Drift-Resilient Temporal Priors for Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Lin, Liting and Zhuang, Weijun and He, Zhenyu and Li, Xin}, title = {Drift-Resilient Temporal Priors for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6847-6856} }
MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Chen, Jiajun and Sun, Tianxiang and Yang, Xun and Yao, Angela}, title = {MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11381-11391} }
Spatia: Video Generation with Updatable Spatial Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Liu, Zhening and Zhang, Hongyang and Xu, Chang and Lu, Yan}, title = {Spatia: Video Generation with Updatable Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4245-4257} }
ORV: 4D Occupancy-centric Robot Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiuyu and Li, Bohan and Xu, Shaocong and Wang, Nan and Ye, Chongjie and Chen, Zhaoxi and Qin, Minghan and Ding, Yikang and Zhu, Zheng and Jin, Xin and Zhao, Hang and Zhao, Hao}, title = {ORV: 4D Occupancy-centric Robot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1053-1066} }
RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Yiyang and Zhao, Tianhao and Xiao, Peilun and Jin, Hongyu and Qi, Longyu and Li, Yuxuan and Liang, Liyin and Qian, Yifeng and Lai, Chunbo and Lin, Yutian and Li, Zhihui and Wu, Yu}, title = {RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2768-2777} }
OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zemskova_2026_CVPR, author = {Zemskova, Tatiana and Staroverov, Aleksei and Yudin, Dmitry and Panov, Aleksandr}, title = {OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8120-8129} }
Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengchao and Chen, Yuxin and Ge, Yuying and Huang, Wei and Lin, Jiehong and Shan, Ying and Qi, Xiaojuan}, title = {Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9637-9646} }
THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koutoupis_2026_CVPR, author = {Koutoupis, Stefanos and Zervou, Michaela Areti and Kontras, Konstantinos and De Vos, Maarten and Tsakalides, Panagiotis and Tsagkatakis, Grigorios}, title = {THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8825-8835} }
Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xuan and Li, Kangle and Huang, Haohang and Meng, Rui and Zeng, Wenjun and Shen, Xiaoyu}, title = {Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9699-9709} }
SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Li, Jiahe and Yu, Xiaohan and Yu, Lina and Zheng, Jin and Bai, Xiao}, title = {SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7480-7490} }
Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2026_CVPR, author = {Zang, Zehua and Wang, Xi and Sun, Fuchun and Xu, Xiao and Liu, Lixiang and Zhou, Jiahuan and Li, Jiangmeng}, title = {Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8110-8119} }
Towards High-resolution and Disentangled Reference-based Sketch Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Wang, Ru and Li, Zhuoru and Yu, Jinze and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Towards High-resolution and Disentangled Reference-based Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11472-11481} }
UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gu and Xu, Qicheng and Zhang, Haozhe and Ma, Jianhan and He, Long and Bao, Yiming and Ping, Zeyu and Yuan, Zhecheng and Lu, Chenhao and Yuan, Chengbo and Liang, Tianhai and Tian, Xiaoyu and Shao, Maanping and Zhang, Feihong and Ding, Mingyu and Gao, Yang and Zhao, Hao and Zhao, Hang and Xu, Huazhe}, title = {UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1841-1852} }
LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Junkun and Au, Ho Yin and Xiang, Jingyu and Chen, Jie}, title = {LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9364-9373} }
PRUE: A Practical Recipe for Field Boundary Segmentation at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Muhawenayo_2026_CVPR, author = {Muhawenayo, Gedeon and Robinson, Caleb and Khanal, Subash and Fang, Zhanpei and Corley, Isaac and Wollam, Alexander and Gao, Tianyi and Strnad, Leonard and Avery, Ryan and Estes, Lyndon and T\'arano, Ana and Jacobs, Nathan and Kerner, Hannah}, title = {PRUE: A Practical Recipe for Field Boundary Segmentation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6484-6495} }
Recovering Physically Plausible Human-Object Interactions from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Dingbang and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Recovering Physically Plausible Human-Object Interactions from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7079-7088} }
SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yumeng and Jiang, Ying and Lu, Jiayin and Yang, Yin and Jiang, Chenfanfu}, title = {SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7233-7243} }
LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Zihe and Zhang, Zhuosheng and Gui, Jiaping and Liu, Gongshen}, title = {LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6528-6537} }
AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zixuan and Feng, Xiangrong and Shi, Jieqi and Shao, Lin and Huo, Jing and Gao, Yang}, title = {AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6760-6769} }
MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Xuanhang and Yang, Zhonghao and Zhuo, Cheng and Li, Yu}, title = {MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9394-9403} }
DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhechao and Zeng, Yiming and Ma, Lufan and Fu, Zeqing and Bai, Chen and Yin, Dongshuo and Lin, Ziyao and Lu, Cheng}, title = {DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3699-3708} }
Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jinwen and Lin, Youfang and Hu, Xiaobo and Wang, Shuo and Lv, Kai}, title = {Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9859-9868} }
GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayin and Sun, Caixia and Yang, Boyu and Li, Hailin and Chen, Xiao and Zhang, Yi and Ding, Errui and Li, Liang and Deng, Chao and Feng, Junlan}, title = {GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9593-9603} }
Omni2Sound: Towards Unified Video-Text-to-Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Yusheng and Chen, Zehua and Jiang, Yuxuan and Ke, Qiuhong and Cai, Jianfei and Zhu, Jun}, title = {Omni2Sound: Towards Unified Video-Text-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1661-1671} }
LogCD: Local-to-global Consistency Distillation for Few-step Image Generation-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Liao, Zhenyi and Chen, Chen and Deng, Zhijie and Lu, Haonan}, title = {LogCD: Local-to-global Consistency Distillation for Few-step Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8954-8964} }
EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Monga_2026_CVPR, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11324-11333} }
QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Tianxiao and Zhao, Shanwei and Fang, Shuo and Zhu, Shiai and Ma, Chenguang}, title = {QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3553-3562} }
b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zohra_2026_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Itani, Hani and Ghanem, Bernard}, title = {b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {680-689} }
Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Haonan and Dong, Shichao and Dong, Xin and Sun, Zenghui and Wang, Jin and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Zhang, Kaifu}, title = {Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {766-777} }
Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiaojun and Kittler, Josef}, title = {Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1493-1502} }
CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Yuyang and Gu, Jiaqi and Lou, Yujing and Fan, Lubin and Yang, Qi and Wang, Ying and Ding, Kun and Wu, Yue and Xiang, Shiming and Ye, Jieping}, title = {CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5232-5241} }
OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hao and Wang, Ze and Guo, Shangwei and Duan, Mengfei and Wang, Song and Chen, Teng and Yang, Kailun and Wang, Lin and Wang, Kaiwei}, title = {OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14229-14240} }
ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yu-Wei and Wang, Xin and Mao, Pengzhe and Feng, Tongtong and Wang, Ren and Zhu, Wenwu}, title = {ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8087-8096} }
Bridging Domains through Subspace-Aware Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaves_2026_CVPR, author = {Chaves, Levy and Zhou, Chao and Burkholz, Rebekka and Valle, Eduardo and Avila, Sandra}, title = {Bridging Domains through Subspace-Aware Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7990-7999} }
Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Berthy T. and Chael, Andrew A. and Bromley, David and Levis, Aviad and Freeman, William T. and Bouman, Katherine L.}, title = {Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12511-12521} }
Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yicheng and Zhang, Wanpeng and Wang, Ye and Luo, Hao and Yuan, Haoqi and Zheng, Sipeng and Lu, Zongqing}, title = {Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {712-723} }
Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Che, Chengan and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9921-9931} }
Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Dongwon and Seo, Gawon and Lee, Jinsung and Cho, Minsu and Kwak, Suha}, title = {Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8183-8193} }
Vision-Speech Models: Teaching Speech Models to Converse about Images-
[pdf]
[supp]
[bibtex]@InProceedings{Royer_2026_CVPR, author = {Royer, Am\'elie and B\"ohle, Moritz and Mazar\'e, Laurent and Zeghidour, Neil and D\'efossez, Alexandre and P\'erez, Patrick}, title = {Vision-Speech Models: Teaching Speech Models to Converse about Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1706-1715} }
Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chun_2026_CVPR, author = {Chun, Yongchan and Park, Chanhee and Yoon, Jeongho and Seo, Jaehyung and Lim, Heuiseok}, title = {Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6157-6166} }
SplitFlux: Learning to Decouple Content and Style from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yitong and Wang, Yinglin and Wang, Changshuo and Zhang, Yongjun and Chen, Ziyang and He, Shuting}, title = {SplitFlux: Learning to Decouple Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {572-582} }
CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hanyang and Liu, Yiyang and Chi, Jiawei and Liu, Fangfu and Xue, Ran and Duan, Yueqi}, title = {CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11437-11447} }
FlowComposer: Composable Flows for Compositional Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhenqi and Li, Lin and Chen, Long}, title = {FlowComposer: Composable Flows for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12396-12405} }
XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Kihwan and Shin, Juyeon and Kang, Jungheum and Kim, Sijung and Jeon, Minyong}, title = {XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5762-5771} }
Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Aojun and Feng, Tao and Yuan, Hangjie and Li, Wei and Sun, Yanan}, title = {Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4761-4771} }
Cross-Hand Latent Representation for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangqi and Liang, Yutong and Ye, Jianglong and Huang, Jia-Yang and Jing, Changwei and Duan, Rocky and Abbeel, Pieter and Wang, Xiaolong and Zou, Xueyan}, title = {Cross-Hand Latent Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13496-13507} }
NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Chen, Zhiwei and Feng, Wei and Wan, Liang}, title = {NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6304-6313} }
POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junfeng and Xue, Zhe and Qi, Yuankai and Du, Junping and Kong, Xiangyang and Yan, Yishuo and Beheshti, Amin and Yang, Jian and van den Hengel, Anton and Yang, Ming-Hsuan}, title = {POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2735-2745} }
PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hadgi_2026_CVPR, author = {Hadgi, Souhail and Gong, Bingchen and Sundararaman, Ramana and Pierson, Emery and Li, Lei and Wonka, Peter and Ovsjanikov, Maks}, title = {PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3014-3023} }
TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Leyuan and Zhang, Huanjia and Pan, Dongyu and Wu, Hai and Xia, Qiming and Xiong, Kezheng and Li, Wen and Wen, Chenglu and Wang, Cheng}, title = {TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9965-9975} }
SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jing-Yao and Zhang, Heng and Zhang, Mingsen and Yang, Binbin and Yin, Fei}, title = {SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3825-3834} }
UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guozhen and Zhou, Zixiang and Hu, Teng and Peng, Ziqiao and Zhang, Youliang and Chen, Yi and Zhou, Yuan and Lu, Qinglin and Wang, Limin}, title = {UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1950-1960} }
Egocentric Visibility-Aware Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Peng and Zhang, Yu and Yiqiang, Feng and Fan, Zhen and Zhang, Yang}, title = {Egocentric Visibility-Aware Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7047-7057} }
VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Chen, Guilian and Jiang, Zheng and Wu, Huisi and Qin, Jing}, title = {VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1439-1449} }
Visual Grounding for Object Questions-
[pdf]
[supp]
[bibtex]@InProceedings{Everaert_2026_CVPR, author = {Everaert, Martin Nicolas and Liu, Xiruo and Takeda, Hiroyuki and Bala, Raja and Yadav, Vivek and Narayanan, Vidya}, title = {Visual Grounding for Object Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11966-11975} }
HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jie and Li, Xin and Yang, Fan and Wang, Yan and Yu, Dong and Wang, Changying and Dai, Linwei and Chen, Yongxiang and Chen, Youqin and Chen, Jianzhang}, title = {HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6410-6420} }
Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6621-6630} }
Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Yingxin and YU, Zitong and Wang, Jun and Shen, Linlin and Xu, Yong and Cao, Xiaochun}, title = {Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14073-14083} }
D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Shang, Wenqi and Wang, Meifang and Wang, Xiao}, title = {D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11229-11239} }
A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction-
[pdf]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Jingzhou and Zhao, Tianya and Wang, Xuyu}, title = {A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4973-4982} }
LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qiang and Yang, Yaozong and Wang, Kaibin and Wei, Ziteng and Chen, Feifei and Chua, Caslon and Yang, Yun}, title = {LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5242-5252} }
FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs-
[pdf]
[supp]
[bibtex]@InProceedings{Zinonos_2026_CVPR, author = {Zinonos, Andreas and Stypu{\l}kowski, Micha{\l} and Bigata, Antoni and Petridis, Stavros and Pantic, Maja and Drobyshev, Nikita}, title = {FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10898-10908} }
Bridging Domain Expertise and Generalization for Performance Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuxuan and Zhao, Zhilin and Kong, Quyu and Zheng, Wei-Shi}, title = {Bridging Domain Expertise and Generalization for Performance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7967-7977} }
VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Li, Ye and Lu, Rongwei and Tang, Chen and Xia, Shu-Tao and Wang, Zhi}, title = {VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12173-12182} }
Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oikarinen_2026_CVPR, author = {Oikarinen, Tuomas and Yan, Ge and Kulkarni, Akshay and Weng, Tsui-Wei}, title = {Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2885-2894} }
NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Tianlin and Dai, Jiayi and Yuan, Chenpu and Lv, Zhengyao and Yang, Binxin and Yin, Hubery and Li, Chen and Lyu, Jing and Shan, Caifeng and Si, Chenyang}, title = {NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1917-1927} }
ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hoyoung and Jang, Minwoo and Koo, Jabin and Yun, Sangdoo and Ok, Jungseul}, title = {ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9063-9073} }
CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chonghuinan and Chen, Zihan and Wei, Yuxiang and Jiang, Tianyi and Wu, Xiaohe and Li, Fan and Zuo, Wangmeng and Yao, Hongxun}, title = {CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9029-9039} }
MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Yuhao and Choudhuri, Anwesa and Gao, Zhongpai and Planche, Benjamin and Nguyen, Van Nguyen and Zheng, Meng and Shen, Yuhan and Innanje, Arun and Chen, Terrence and Elhamifar, Ehsan and Wu, Ziyan}, title = {MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2788-2798} }
Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Wanying and Gao, Jianxiong and Wang, Wei and Fu, Yanwei}, title = {Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6994-7002} }
Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Gong, Jiachao and Wu, Mingda and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11426-11436} }
Pano360: Perspective to Panoramic Vision with Geometric Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengdong and Xue, Weiyi and Yang, Zuyuan and Zhou, Wenlve and Zhou, Zhiheng}, title = {Pano360: Perspective to Panoramic Vision with Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7600-7609} }
Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective-
[pdf]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Kaifang and Ma, Lianbo and Liu, Jiaqi and Liu, Liming and Xie, Guoyang}, title = {Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14116-14125} }
NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Weijian and Nan, Mu and Zhu, Yu and Cao, Jiahang and Zhang, Rui and Dai, Yuqin and Song, Chunfeng and Luo, Andrew and Wu, Jiamin}, title = {NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12966-12976} }
PhaseWin Search Framework Enable Efficient Object-Level Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zihan and Chen, Ruoyu and Zhang, Junchi and Hu, Yue and Zhang, Hua and Cao, Xiaochun}, title = {PhaseWin Search Framework Enable Efficient Object-Level Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2875-2884} }
FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Min and Ma, Junchao and Feng, Yinfu and Ding, Jiajun and Pan, Wenwen and Han, Tingting and Zheng, Qian and Kuang, Zhenzhong and Yu, Zhou}, title = {FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3400-3409} }
ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wei and Liu, Jizhihui and Yixing, Li and Tong, Junwen and Shao, Rui and Nie, Liqiang}, title = {ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6706-6717} }
ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Han and Jia, Xiaosong and Xie, Yichen and Sun, Siyu and Liao, Wenlong and Yang, Xiaokang and Yan, Junchi}, title = {ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3656-3666} }
Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Shengju and Liu, Suyuan and Shao, Wenhao and Wang, Siwei and Liang, Ke and Yang, Xihong and Li, Tiejun and Liu, Xinwang}, title = {Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3594-3603} }
SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Harris, Kevin and Copple, Braden and Han, Shangchen and Xie, Xu and Shugurov, Ivan and An, Sizhe and Wen, He and Wong, Alex and Hodan, Tomas and He, Kun}, title = {SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7111-7120} }
Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Amoyal_2026_CVPR, author = {Amoyal, Roy and Freifeld, Oren and Baskin, Chaim}, title = {Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4993-5002} }
MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Liang, He and Galoaa, Bishoy and Nandi, Utsav and Moezzi, Shayda and He, Yuhang and Ostadabbas, Sarah}, title = {MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7632-7642} }
DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Dahye and Ghadiyaram, Deepti and Gadde, Raghudeep}, title = {DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11459-11471} }
Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yang and Chi, Zhixiang and Yan, Xudong and Wang, Yang and Feng, Songhe}, title = {Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5553-5563} }
SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Huiyuan and Yoon, Sang Min}, title = {SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4363-4372} }
CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Xianghui and Wen, Bowen and Chang, Yan and Rabeti, Hesam and Li, Jiefeng and Yuan, Ye and Pons-Moll, Gerard and Birchfield, Stan}, title = {CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14006-14016} }
SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zixuan and Tang, Kaiyuan and Xia, Jun and Qin, Yifan and Gu, Lin and Wang, Chaoli and Chen, Jianxu and Shi, Yiyu}, title = {SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12162-12172} }
Scalable Feature Matching via State Space Modeling and Sparse Correlation-
[pdf]
[bibtex]@InProceedings{Choo_2026_CVPR, author = {Choo, Sin Wai and Li, Bo}, title = {Scalable Feature Matching via State Space Modeling and Sparse Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6685-6694} }
Real-World Point Tracking with Verifier-Guided Pseudo-Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Aydemir_2026_CVPR, author = {Aydemir, G\"orkay and G\"uney, Fatma and Xie, Weidi}, title = {Real-World Point Tracking with Verifier-Guided Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13553-13562} }
ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoxue and Chen, Xinyuan and Wang, Yaohui and Qiao, Yu}, title = {ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2079-2089} }
Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Qianhao and Mi, Jiajia and Yan, Mingtao and Liu, JingSheng and Pang, ShuYang and Li, Weiling}, title = {Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14137-14146} }
Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Tsai-Ching and Chen, Cheng-Chi and Yang, Yuan-Fu}, title = {Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13059-13068} }
Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianbin and Feng, Chaoran and Yu, Miao and Li, Yingtao and Tang, Zhenyu and Yu, Wangbo and Zhao, Yian and Li, Xiaomin and Yuan, Li and Tian, Yonghong}, title = {Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12818-12828} }
AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3421-3431} }
Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yusen and Lin, Qing and Nunna, Bhargava Satya and Zhang, Mengmi}, title = {Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13766-13780} }
MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jongmin and Kang, Seungyeop and Yoo, Sungjoo}, title = {MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7446-7456} }
AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiawei and Zhu, Wanrong and I Morariu, Vlad and Tensmeyer, Christopher}, title = {AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {626-635} }
Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping-
[pdf]
[supp]
[bibtex]@InProceedings{Guillaume_2026_CVPR, author = {Guillaume, Duret and Mazurak, Danylo and Zara, Florence and Peters, Jan and Chen, Liming}, title = {Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1809-1818} }
Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Pingrui and Su, Yifei and Wu, Pengyuan and An, Dong and Zhang, Li and Wang, Zhigang and Wang, Dong and Zhao, Bin}, title = {Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1006-1019} }
The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Acharya_2026_CVPR, author = {Acharya, Anusha and Sapkota, Hitesh and Yu, Qi and Liu, Xumin}, title = {The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14147-14156} }
Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Shimin and Wei, Yuanyi and Zha, Fei and Guo, Yudong and Zhang, Juyong}, title = {Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12730-12740} }
Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beemelmanns_2026_CVPR, author = {Beemelmanns, Till and Nekrasov, Alexey and Vilceanu, Stefan and Steinhaus, Jonas and Woopen, Timo and Leibe, Bastian and Eckstein, Lutz}, title = {Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4677-4686} }
PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Leezy and Kim, Seunggyu and Shim, Dongseok and Lee, Hyeonbeom}, title = {PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12617-12627} }
DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Siran and Zhang, Haoyuan and Gao, Li and Zhang, Tianshuo and Zhu, Xiangyu and Li, Bao and Zhao, Weisong and Lei, Zhen}, title = {DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14095-14105} }
CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Jeongbin and Choi, Dooseop and An, Taeg-Hyun and An, Kyounghwan and Min, Kyoung-Wook}, title = {CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10986-10995} }
Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yu and Peng, Zelin and Wen, Changsong and Yang, Xiaokang and Shen, Wei}, title = {Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6399-6409} }
Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Dahu and He, Chengshen and Zhang, Shaochen and Qian, Bo and Quan, Xiaochen and Zhang, Wencong and Wei, Xing}, title = {Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14157-14166} }
GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Tianchen and Chen, Xuefeng and Chen, Yi and Chen, Qu and Xu, Yuyao and Yang, Lijin and Xu, Le and Zhang, Yu and Zhang, Bo and Huang, Wuxiong and Wang, Hesheng}, title = {GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10656-10667} }
Watch and Learn: Learning to Use Computers from Online Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Chan Hee and Song, Yiwen and Goyal, Palash and Su, Yu and Riva, Oriana and Palangi, Hamid and Pfister, Tomas}, title = {Watch and Learn: Learning to Use Computers from Online Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5421-5431} }
DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eungi and Back, Seung-hyeok and Kim, Hyung-Il and Yoo, Seok Bong}, title = {DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6569-6579} }
R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Junhyuk and Seo, Sanghyuk and Kim, Dawoon and Oh, Heeseok}, title = {R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9996-10005} }
Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yalan and Wu, Hanzhou}, title = {Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1616-1626} }
DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenjie and Chai, Yilin and Jia, Xiaosong and Li, Qifeng and Shao, Yuqian and Zhu, Xuekai and Su, Haisheng and Yan, Junchi}, title = {DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10678-10688} }
Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyao and Zhao, Zhendong and Chen, Xiaojun and Zhao, Xin and Xuan, Yuexin and Ji, Xiaoshuang}, title = {Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13376-13385} }
Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yiwen and Guo, Zoey and Zhu, Kaixin and Zhang, Ray and Chen, Qizhi and Jiang, Dongzhi and Liu, Junli and Zeng, Bohan and Song, Haoming and Qu, Delin and Bai, Tianyi and Xu, Dan and Zhang, Wentao and Zhao, Bin}, title = {Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3197-3207} }
Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yulong and Xu, Hua and Cai, Yiyang and Jiang, Chunyang and Han, Sirui and Guo, Yike}, title = {Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6900-6909} }
OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Yoonjin and Kim, Yongjin and Kim, Hyomin and Chi, Donghwan and Kim, Sungwoong}, title = {OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7620-7631} }
Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jialuo and Li, Bin and Li, Jiahao and Lu, Yan}, title = {Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11369-11380} }
Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Zheng, Kecheng and Zhu, Minfeng and Wu, Wei and Lu, Fan and Zhai, Wei and Chen, Wei}, title = {Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2357-2368} }
Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziling and Yang, Shuya and Lu, Jialin and Chow, Ka-Ho}, title = {Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10293-10302} }
RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yijiang and Kotian, Kunal and Marjaninejad, Ali and Friedenberg, Meir and Pavani, Kaushik and Dasgupta, Sunny}, title = {RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2725-2734} }
MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiale and Zhao, Wang and Shan, Ying}, title = {MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5912-5922} }
ReMoT: Reinforcement Learning with Motion Contrast Triplets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Cong and Guo, Zeyu and Li, Jiangyang and Dong, Songlin and Bai, Yifan and Peng, Lin and Ma, Zhiheng and Gong, Yihong}, title = {ReMoT: Reinforcement Learning with Motion Contrast Triplets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5487-5498} }
LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Debnath_2026_CVPR, author = {Debnath, Soumyaratna and Manh, Bui Duc and Liu, Zinan and Wang, Lin}, title = {LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3133-3142} }
TextOVSR: Text-Guided Real-World Opera Video Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Hua and Xu, Xin and Liu, Wei and Wu, Jiayi and Jiang, Kui and Ma, Fei and Tian, Qi}, title = {TextOVSR: Text-Guided Real-World Opera Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2156-2165} }
Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junyao and Cheng, Zhongwei and Wong, Waikeung and Zou, Xingxing}, title = {Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1123-1133} }
Adaptive Capacity Autoregressive Visual Tracking-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tong and Bai, Yifan and Liang, Shiyi and Niu, Ruigang and Wei, Xing}, title = {Adaptive Capacity Autoregressive Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13574-13583} }
Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guangxun and Haberle, Mason and Geiger, Davi}, title = {Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9223-9232} }
Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjin and Sun, Xiaoxiao and Fan, Hehe}, title = {Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5499-5510} }
3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Ze-Xin and Liu, Liu and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Yang, Jian and Xie, Jin}, title = {3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12753-12763} }
Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yubo and An, Yitong and Yang, Xin and Wuerkaixi, Abudukelimu and Cheng, Xuxin and Xie, Fengying and Jiang, Zhiguo and Liu, Cao and Zeng, Ke and Zhang, Haopeng}, title = {Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4210-4220} }
CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yizheng and Zhuang, Yiyu and Xu, Qipeng and Wang, Haixiang and Zhu, Jiahe and Tian, Jing and Zhu, Siyu and Zhu, Hao}, title = {CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11006-11016} }
EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zheng, Yike and Dai, Ziyun and Ding, Henghui}, title = {EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2005-2014} }
SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh Binh and Park, Chae Jung}, title = {SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8674-8683} }
ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Byeongjun and Kim, Byung-Hoon and Chung, Hyungjin and Ye, Jong Chul}, title = {ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11163-11173} }
Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Chen, Zhe and Zhang, Jing and Tao, Dacheng}, title = {Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10082-10092} }
SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fillioux_2026_CVPR, author = {Fillioux, Leo and Chakraborty, Omprakash and Ben Ayed, Ismail and Courn\`ede, Paul-Henry and Christodoulidis, Stergios and Vakalopoulou, Maria and Dolz, Jose}, title = {SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4772-4782} }
Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Guangyan and Shao, Qi and Cui, Te and Zhou, Zichen and Mao, Weixin and Yang, Luojie and Wang, Meiling and Yang, Yi and Chen, Hua and Yue, Yufeng}, title = {Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12871-12880} }
Language-Free Generative Editing from One Visual Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elezabi_2026_CVPR, author = {Elezabi, Omar and Zamfir, Eduard and Wu, Zongwei and Timofte, Radu}, title = {Language-Free Generative Editing from One Visual Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1078-1088} }
EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haolan and Cheng, Keli and Wang, Lei and Bi, Ning and Liu, Xiaoming}, title = {EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10921-10931} }
Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jinyu and Hu, Tianqi and Hu, Xiaonan and Zhou, Letian and Cao, Songliang and Zhang, Meng and Lu, Hao}, title = {Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {167-177} }
Heterogeneous Decentralized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhiying and Seraj, Raihan and Villagra, Marcos and Roy, Bidhan}, title = {Heterogeneous Decentralized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2391-2400} }
Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qi and Wu, Mian and Zhang, Yuyang and Yuan, Mingqi and Zhang, Wenyao and You, Haoxiang and Wang, Yunbo and Jin, Xin and Yang, Xiaokang and Zeng, Wenjun}, title = {Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8077-8086} }
Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, Gyeongsik}, title = {Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8891-8900} }
GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Gao, Junjie and Pan, Yongzhou and Song, Siyuan and Zhang, Zixuan and Xiao, Jiaping and Feroskhan, Mir}, title = {GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {996-1005} }
CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Liangbin and Liao, Xiaohua and Cui, Chaoqun and Wang, Shijing and Huang, Zhaolong and Du, Yanlong and Mao, Wenji}, title = {CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8836-8845} }
Learning Personalized Photographic Style from Pairwise User Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jinwoo and Yoo, Jihye and Kim, Seon Joo}, title = {Learning Personalized Photographic Style from Pairwise User Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1134-1144} }
DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Peng, Shaohui and Zhao, Yongwei and Li, Ling}, title = {DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8000-8010} }
Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Umangi and Kim, Vladimir and Gadelha, Matheus and Gilitschenski, Igor and Chen, Zhiqin}, title = {Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6377-6387} }
LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jianshi and Zhu, Minghang and Liu, Dunqiang and Li, Wen and Ao, Sheng and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9932-9942} }
SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Chaojun and Chen, Cheng and Wang, Xiaofeng and Zhu, Zheng and Zheng, Wenzhao and Wang, Boyuan and Chen, Tianrun and Zhao, Guosheng and Li, Haoyun and Dong, Zhehao and Zhang, Qiang and Ye, Yun and Wang, Yang and Huang, Guan and Mei, Wenjun}, title = {SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13474-13485} }
SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuechen and Zhang, Xiaoyan and Shan, Yicheng and Hao, Zhu and Tang, Rui and Wei, Rong and Song, Mingli and Wan, Yuanyu and Song, Jie}, title = {SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2645-2657} }
Omni-MMSI: Toward Identity-attributed Social Interaction Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinpeng and Lai, Bolin and Chen, Hardy and Deng, Shijian and Xie, Cihang and Zhou, Yuyin and Rehg, James M. and Tian, Yapeng}, title = {Omni-MMSI: Toward Identity-attributed Social Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8684-8696} }
Unsupervised 3d Motion Estimation Using Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Han and Zhai, Wei and Zhao, Tiesong and Li, Bin and Cao, Yang and Zha, Zheng-jun}, title = {Unsupervised 3d Motion Estimation Using Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8067-8076} }
Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Keyao and Liu, Shuai and Shi, Hengda and Shi, Lukui and Chen, Haiyong}, title = {Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4415-4424} }
HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Yang and Yang, Xiaomeng and Deng, Keli and Qian, Yuntao}, title = {HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11240-11249} }
Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Yongqi and Yang, Kunshan and Li, Linze and Zhang, Yiyang and Jing, Mengmeng and Zuo, Lin}, title = {Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {892-902} }
Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Youming and Peng, Songyou and Zhang, Junyi and Heal, Kathryn and Sun, Tiancheng and Flynn, John and Marschner, Steve and Chai, Lucy}, title = {Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7351-7361} }
Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jinlong and Jiang, Liyuan and Zhang, Haonan and Sebe, Nicu}, title = {Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10451-10461} }
Decoupling Defense Strategies for Robust Image Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahui and Deng, Zehang and Zhang, Zeyu and Li, Chaoyang and Jia, Lianchen and Sun, Lifeng}, title = {Decoupling Defense Strategies for Robust Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3316-3325} }
What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Zhihan and He, Lijun and Liang, Jiaxi and Fu, Xinzhu and Bi, Haixia and Li, Fan}, title = {What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13366-13375} }
Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Bin and Sun, Wei and Wang, Qianqian and Feng, Wei and Chen, Yijie and Zhang, Haixi}, title = {Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8664-8673} }
WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Shaoheng and Jiang, Hanwen and Bai, Yunpeng and Mitra, Niloy J. and Huang, Qixing}, title = {WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11195-11206} }
Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moreau_2026_CVPR, author = {Moreau, Arthur and Shaw, Richard and Nazarczuk, Michal and Shin, Jisu and Tanay, Thomas and Zhang, Zhensong and Xu, Songcen and P\'erez-Pellitero, Eduardo}, title = {Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11756-11766} }
DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qichao and Lu, Yunhong and Cao, Hengyuan and Zhang, Junyi and Zhang, Min}, title = {DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12417-12427} }
CamPI: Physical Adversarial Examples through Camera Power Signal Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Yanze and Lv, Mingyuan and Jiang, Qinhong and Jiang, Yan and Yan, Chen and Ji, Xiaoyu and Xu, Wenyuan}, title = {CamPI: Physical Adversarial Examples through Camera Power Signal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6611-6620} }
Learning Compact 3D Representations from Feed-Forward Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Honggyu and Jung, Jaewoo and Kim, Mungyeom and Kim, Chaehyun and Jeon, Minkyeong and Han, Jisang and Fukuda, Kazumi and Narihira, Takuya and Ko, Hyunah and Kim, Junsu and Hong, Sunghwan and Mitsufuji, Yuki and Kim, Seungryong}, title = {Learning Compact 3D Representations from Feed-Forward Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {363-373} }
ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Siyuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8130-8140} }
FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wuyang and Tan, Chengkai and Ge, Chang and Hong, Binye and Yang, Su and Ma, Yongjiu}, title = {FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {583-593} }
SceneTok: A Compressed, Diffusable Token Space for 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asim_2026_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Lenssen, Jan Eric}, title = {SceneTok: A Compressed, Diffusable Token Space for 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5870-5880} }
DRM: Diffusion-based Reward Model With Step-wise Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jaxon and Yang, Binxin and Yin, Hubery and Li, Chen and LYU, Jing}, title = {DRM: Diffusion-based Reward Model With Step-wise Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12764-12774} }
TruckDrive: Long-Range Autonomous Highway Driving Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghilotti_2026_CVPR, author = {Ghilotti, Filippo and Palladin, Edoardo and Brucker, Samuel and Sigal, Adam and Bijelic, Mario and Heide, Felix}, title = {TruckDrive: Long-Range Autonomous Highway Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10587-10598} }
Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xusheng and Zhou, Lihua and Li, Nianxin and Xu, Miao and Song, Ziyang and Yi, Dong and Wu, Jinlin and Ma, Jiawei and Liu, Hongbin and Lei, Zhen and Luo, Jiebo}, title = {Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13670-13679} }
GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abu_Lehyeh_2026_CVPR, author = {Abu Lehyeh, Ayesh and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5369-5378} }
RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lawrence_2026_CVPR, author = {Lawrence, Logan and Saha, Oindrila and Daroya, Rangel and Chasmai, Mustafa and Liu, Wuao and Hamilton, Max and Sun, Aaron and Jeong, Seoyun and Delattre, Fabien and Maji, Subhransu and Van Horn, Grant}, title = {RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2445-2456} }
Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Lin, Chenguo and Li, Chenxin and Zhao, Jingjing and Lin, Yuchen and Li, Haopeng and Lin, Yunlong and Wen, Kairun and Yuan, Yixuan and MU, Yadong}, title = {Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4232-4244} }
X-WIN: Building Chest Radiograph World Model via Predictive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zefan and Wang, Ge and Hendler, James and Kalra, Mannudeep K. and Yan, Pingkun}, title = {X-WIN: Building Chest Radiograph World Model via Predictive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6920-6930} }
OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghyeon and Lee, Minwoo and Shin, Euijin and Kim, Kangyeol and Choi, Seunghwan and Choo, Jaegul}, title = {OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9233-9242} }
Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zheng and He, Nan and Chen, Yiming and Sun, Lifeng}, title = {Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3369-3378} }
Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Sangare_2026_CVPR, author = {Sangare, Amadou S. and Maglo, Adrien and Chaouch, Mohamed and Luvison, Bertrand}, title = {Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9106-9115} }
AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Li'an and He, Ziqiang and Zheng, Jibin and Li, Jin and Wang, Z. Jane and Kang, Xiangui}, title = {AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11076-11085} }
Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sebin and Lee, Jumin and Kim, Taeyeon and Na, Youngju and Im, Woobin and Yoon, Sung-Eui}, title = {Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13486-13495} }
STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Wang, Dongsheng and Sun, Zening and Zhang, Jinsen and Luo, Wenhan and Guo, Jingcai}, title = {STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12374-12384} }
Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huangfu_2026_CVPR, author = {Huangfu, Yuanxiang and Wang, Chaochao and Wang, Weilei}, title = {Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10142-10151} }
MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Han and Sun, Jiakai and Xu, Yexing and Zhao, Lei and Xing, Wei and Lin, Huaizhong}, title = {MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11767-11776} }
D-Prism: Differentiable Primitives for Structured Dynamic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xingyuan and Li, Yijin and Zeng, Chong and Ming, Yuhang and Bao, Hujun and Zhang, Guofeng}, title = {D-Prism: Differentiable Primitives for Structured Dynamic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7556-7566} }
LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception-
[pdf]
[supp]
[bibtex]@InProceedings{de_Moreau_2026_CVPR, author = {de Moreau, Simon and Bursuc, Andrei and El Idrissi, Hafid and Moutarde, Fabien}, title = {LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14188-14197} }
AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lei and Li, Jifeng and Gao, Juntao and Ye, Feiyang and Jin, Yan and Qian, Jingjing and Zhang, Jing and Wu, Yong and Yu, Xiaoyuan}, title = {AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13453-13463} }
EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pallotta_2026_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Doorenbos, Lars and Ozsoy, Serdar and Iqbal, Umar and Gall, Juergen}, title = {EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4269-4279} }
Act2See: Emergent Active Visual Perception for Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Martin Q. and Qu, Yuxiao and Agrawal, Aditya and Guo, Willis and Liang, Paul Pu and Salakhutdinov, Ruslan and Morency, Louis-Philippe}, title = {Act2See: Emergent Active Visual Perception for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5455-5464} }
RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xiaokai and Zhou, Chenxu and Zheng, Lianqing and Liu, Jianan and Cao, Si-Yuan and Zhang, Xiaohan and Li, Yiming and Zhang, Zhengzhuang and Shen, Hui-Liang}, title = {RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4983-4992} }
Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yitian and Zhang, Shigeng and Liu, Xuan and Lu, Mingming and Chen, Kai and Zhu, Hongye and Chen, Xinning}, title = {Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10789-10798} }
From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Luo, Xin and Xu, Xin-Shun}, title = {From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12364-12373} }
High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xianjie and Fu, Keren and Zhao, Qijun}, title = {High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6357-6366} }
Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Qi and Wang, Can and Shang, Jiaxiang and Liu, Yingchun and Liao, Jing}, title = {Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12651-12662} }
A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Sungik and Lee, Hankook and Lee, Jaehoon and Kim, Robin and Choi, Stanley Jungkyu and Lee, Moontae}, title = {A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3254-3263} }
StyleTextGen: Style-Conditioned Multilingual Scene Text Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zeyu and Zhao, Fangmin and Shu, Yan and Liu, Yichao and Yu, Liu and Zhou, Yu}, title = {StyleTextGen: Style-Conditioned Multilingual Scene Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7643-7653} }
Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zheng, Zihan and Zhang, Yuan and Lu, Yan}, title = {Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9752-9761} }
Neural Collapse in Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Du, Zhongjing and Huang, Jiazhen and Jiang, Xu and Lu, Li and Jiang, Jingyan and Wang, Zhi}, title = {Neural Collapse in Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10567-10576} }
ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roy_2026_CVPR, author = {Roy, Ayush and Lee, Wei-Yang Alex and Chakraborty, Rudrasis and Lokhande, Vishnu Suresh}, title = {ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12406-12416} }
Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Guanjie and Huang, Shirui and Sun, Yifu and Liu, Kai and Zhu, Jianchen and Qu, Xiaoye and Cheng, Yu and Chen, Peng}, title = {Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6010-6020} }
MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Zhang, Xinjie and Liu, Zhening and Wang, Jinpeng and Chen, Bin and Li, Jiawei and Ren, Yifan and Xia, Shu-Tao and Zhang, Jun}, title = {MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5306-5315} }
ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yingzhao and Liu, Yanjie and Zhao, Lijun}, title = {ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5003-5013} }
Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Keuntae and Kang, Mingyu and Choi, Yong Suk}, title = {Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5154-5164} }
Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Zixuan and Liu, Quande and Wei, Cong and Zhang, Yuanxing and Wang, Xintao and Wan, Pengfei and Gai, Kun and Luo, Wenhan}, title = {Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9116-9126} }
Adaptive Learned Image Compression with Graph Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yunuo and He, Bing and Lyu, Zezheng and Hu, Hongwei and Gu, Qunshan and Tian, Yuan and Lu, Guo}, title = {Adaptive Learned Image Compression with Graph Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12150-12161} }
ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuheng and Duan, Mengfei and Peng, Kunyu and Wang, Yuhang and Wen, Di and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun}, title = {ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14241-14252} }
ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Hai and Liu, Zhen and Lei, Yinjie and Han, Songchen and Zeng, Bing and Liu, Shuaicheng}, title = {ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1320-1330} }
HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yichen and Zhou, Donghao and Wang, Jie and Gao, Xin and Liu, Guisheng and Li, Jiatong and Zhang, Quanwei and Lyu, Qiang and Guo, Lanqing and Wen, Shilei and Wang, Weiqiang and Heng, Pheng-Ann}, title = {HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1994-2004} }
ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Lukovnikov_2026_CVPR, author = {Lukovnikov, Denis and M\"uller, Andreas and Quiring, Erwin and Fischer, Asja}, title = {ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9213-9222} }
The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuchen and Wang, Yaxiong and Wu, Yujiao and Wu, Lianwei and Zhu, Li and Zheng, Zhedong}, title = {The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8760-8769} }
Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zengyi and Liu, Yu and Cheng, Juan and Zhu, Zhiqin and Zhang, Yafei and Li, Huafeng}, title = {Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {188-198} }
FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)$^N$ Diffusion Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)\${\textasciicircum}N\$ Diffusion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7393-7403} }
PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinxing and Liu, Ajian and Qiang, Sunyuan and Ma, Hui and Yang, Liying and Wang, Yuzhong and Rao, Zhi and Liang, Yanyan}, title = {PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10016-10026} }
Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Dailan and Feng, Guanlin and Ge, Xingtong and Niu, Yazhe and Zhang, Yi and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6033-6042} }
CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Weilin and Rao, Jiahao and Wang, Wenhao and Li, Xinyang and Cheng, Xuan and Cao, Liujuan}, title = {CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4280-4290} }
SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Hongyu and Deng, Jia}, title = {SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7299-7309} }
CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Luo, Chunbo}, title = {CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10116-10125} }
GFRRN: Explore the Gaps in Single Image Reflection Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yu and He, Zewei and Liu, Xingyu and Chen, Zixuan and Lu, Zhe-Ming}, title = {GFRRN: Explore the Gaps in Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5690-5699} }
MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Chenguo and Lin, Yuchen and Pan, Panwang and Yu, Yifan and Hu, Tao and Yan, Honglei and Fragkiadaki, Katerina and Mu, Yadong}, title = {MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {295-306} }
Affostruction: 3D Affordance Grounding with Generative Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Chunghyun and Lee, Seunghyeon and Cho, Minsu}, title = {Affostruction: 3D Affordance Grounding with Generative Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7435-7445} }
Is Parameter Isolation Better for Prompt-Based Continual Learning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Ding, Chenhao and Dong, SongLin and Wang, Qiang and Zhao, Jianchao and He, Yuhang and Gong, Yihong}, title = {Is Parameter Isolation Better for Prompt-Based Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3887-3897} }
MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Di and Yang, Shuhui and Yang, Mingxin and Lu, Jiawei and Tang, Yixuan and Han, Xintong and Chen, Zhuo and Wang, Beibei and Guo, Chunchao}, title = {MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8943-8953} }
Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Boyang and Li, Liang and Peng, Lin and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3122-3132} }
Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Li, Frederick W. B. and Liang, Xiaohui}, title = {Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7131-7141} }
MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Shen, Minghe and Yokoya, Naoto}, title = {MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5772-5782} }
GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5221-5231} }
PhyGaP: Physically-Grounded Gaussians with Polarization Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiale and Bai, Xiaoyang and He, Zongqi and Xu, Weiwei and Peng, Yifan}, title = {PhyGaP: Physically-Grounded Gaussians with Polarization Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7278-7288} }
Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Liyi and Wang, Pengfei and Zhang, Guowen and Ma, Zhiyuan and Zhang, Lei}, title = {Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12640-12650} }
UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rota_2026_CVPR, author = {Rota, Alberto and Kiray, Mert and Karaoglu, Mert Asim and Ruhkamp, Patrick and De Momi, Elena and Navab, Nassir and Busam, Benjamin}, title = {UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {241-250} }
FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Wang, Tao}, title = {FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7936-7945} }
AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vu_2026_CVPR, author = {Vu, Nghia and Do, Tuong and Nguyen, Khang and Huang, Baoru and Le, Nhat and Nguyen, Binh Xuan and Tjiputra, Erman and Tran, Quang D. and Prakash, Ravi and Chiu, Te-Chuan and Nguyen, Anh}, title = {AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2635-2644} }
Language-Grounded Decoupled Action Representation for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Wuding and Wu, Tongshu and Chen, Liucheng and Xie, Siyu and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Language-Grounded Decoupled Action Representation for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6770-6780} }
Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Guangpu and Kie{\ss}, Steffen and Luo, Hanxiang and Liu, Xingyu and Simon, Sven}, title = {Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4902-4911} }
SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yijian and Ou, Mingtao and Pan, Zijian and Ji, Xinglong}, title = {SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4860-4869} }
ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Haoyang and Jiang, Hao and Mu, Yadong}, title = {ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8247-8256} }
RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jihwan and Yang, Chanhyeong and Park, Jinyoung and Song, Taehoon and Kim, Hyunwoo J.}, title = {RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10387-10396} }
MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Haoran and Lee, Gim Hee}, title = {MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11860-11870} }
Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guarino_2026_CVPR, author = {Guarino, Vanessa Emanuela and Winklmayr, Claudia and Franzen, Jannik and Rumberger, Josef Lorenz and Pfeuffer, Manuel and Greven, Sonja and Maier-Hein, Klaus and Kainmueller, Dagmar and Karg, Christoph and L\"uth, Carsten T.}, title = {Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13145-13156} }
Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Toshpulatov_2026_CVPR, author = {Toshpulatov, Mukhiddin and Lee, Wookey and Lee, Suan and Lee, Geehyuk}, title = {Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1819-1828} }
MMGait: Towards Multi-Modal Gait Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenye and Cai, Qingyuan and Hou, Saihui and Li, Aoqi and Huang, Yongzhen}, title = {MMGait: Towards Multi-Modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1726-1736} }
Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Huakeng and Chen, Yaowen and Zhou, Kun and Wu, Hongzhi}, title = {Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12448-12457} }
Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Pochimireddy_2026_CVPR, author = {Pochimireddy, Charantej and Sahoo, Subhasmita and Verma, Apoorva and Shyam, Palavalli and Malviya, Swapnil and Sarvesh, Sarvesh and Gadde, Raj}, title = {Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1376-1385} }
OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingkui and Liang, Dingkang and Chen, Cheng and Zhang, Daoxin and Hanxiang, lv and Xu, Zhe and Hu, Yao and Bai, Xiang}, title = {OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12084-12094} }
Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Junbo and Xu, Yangyang and Wang, Chao and Wen, You-Wei}, title = {Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3646-3655} }
PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mahadev_2026_CVPR, author = {Mahadev, Rohan and Yuan, Joyce and Poirson, Patrick and Xue, David and Wu, Hao-Yu and Kislyuk, Dmitry}, title = {PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9742-9751} }
Physical Object Understanding with a Physically Controllable World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Venkatesh_2026_CVPR, author = {Venkatesh, Rahul and Kotar, Klemen and Chen, Lilian Naing and Lee, Wanhee and Ancone, Gia and Kim, Seungwoo and Wheeler, Luca Thomas and Watrous, Jared and Chen, Honglin and Bear, Daniel and Stojanov, Stefan and Yamins, Daniel LK}, title = {Physical Object Understanding with a Physically Controllable World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2593-2602} }
Learning Effective Sign Features without Text for Gloss-free Sign Language Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Shiwei and Liu, Xiao and Yin, Yafeng and Liu, Nan and Liu, Kuizhuang and Tuerdaken, Desibieer and Jiang, Zhiwei and Xie, Lei and Lu, Sanglu and Wen, Hongkai}, title = {Learning Effective Sign Features without Text for Gloss-free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9827-9836} }
Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Bin and Abdelsamad, Mohamed and Zhang, Miao and Condurache, Alexandru Paul}, title = {Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2938-2947} }
Building a Precise Video Language with Human-AI Oversight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhiqiu and Cen, Siyuan and Mitra, Chancharik and Li, Isaac and Huang, Yuhan and Ling, Yu Tong Tiffany and Wang, Hewei and Pi, Irene and Zhu, Shihang and Han, Yili and Du, Yilun and Ramanan, Deva}, title = {Building a Precise Video Language with Human-AI Oversight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11334-11345} }
Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xingyue and Peng, Shuai and Xie, Xiangyu and Zhu, Jianhua and Zhou, Yuxuan and Gao, Liangcai}, title = {Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9161-9170} }
Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yanglin and Xu, Tianyang and Cheng, Chunyang and Li, Hui and Wu, Xiaojun and Kittler, Josef}, title = {Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12334-12343} }
PhotoFramer: Multi-modal Image Composition Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Zhiyuan and Wang, Ke and Zhang, He and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao and Zhang, Zhoutong}, title = {PhotoFramer: Multi-modal Image Composition Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10197-10207} }
Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yexing and Feng, Wei and Zhang, Shen and Wang, Haohan and Qin, Yuxin and Li, Yaoyu and Ma, Ao and Luo, Yuhao and Wang, Lu and Ren, Xudong and Wang, Haoran and Ling, Run and Zhang, Zheng and Lv, Jingjing and Shen, Junjie and Law, Ching and Wang, Longguang and Guo, Yulan}, title = {Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {472-483} }
Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhi and Fan, YaoQi and Chen, Zhe and Cao, Yue and Liu, Yangzhou and Lu, Tong}, title = {Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11943-11953} }
Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yinghao and Jin, Yeying and Chen, Xiang and Wei, Yanyan and Yan, Ziyang and Fu, Yaowen}, title = {Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1342-1354} }
Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenxi and Liu, Xianggan and Shen, Dake and Du, Yaosong and Yao, Zhibo and Jiang, Hao and Jiang, Linyi and Cao, Chengwei and Zhang, Jingzhe and Peng, RanYi and Bai, Peiling and Huang, Xiande}, title = {Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1533-1542} }
GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Rong and Xiao, Kaiyan and Zhu, Minghao and Wang, Liuyi and Dai, Kai and Yang, Zhao}, title = {GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10408-10418} }
The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Li, Hui and Lin, Jiahua and Wang, Wenwu and Xiao, Jimin}, title = {The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6336-6345} }
Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2026_CVPR, author = {Cha, Hyunsoo and Woo, Wonjung and Kim, Byungjun and Joo, Hanbyul}, title = {Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3988-3997} }
Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Xiaoyu and Ren, Ketong and She, Dongyu and Dong, Weiming and Wang, Miao}, title = {Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8257-8266} }
HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Daichao and Chen, Qiupu and He, Feng and Ning, Xin and Li, Qiankun}, title = {HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8439-8448} }
ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2026_CVPR, author = {Quan, Jinsheng and Miao, Qiaowei and Xu, Yichao and Lin, Zizhuo and Li, Ying and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8331-8341} }
CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xin and Lu, Peng and Chen, Yisong and Pan, Chengwei and Li, Sheng}, title = {CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8288-8297} }
ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Compagnoni_2026_CVPR, author = {Compagnoni, Alberto and Morini, Marco and Sarto, Sara and Cocchi, Federico and Caffagni, Davide and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11901-11911} }
LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Luddecke_2026_CVPR, author = {L\"uddecke, Timo and Meier, Jan Frederik and van Delden, Jan and Ecker, Alexander}, title = {LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2959-2971} }
DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiamu and Yan, Zhiyuan and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong}, title = {DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13984-13995} }
Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangning and Zhu, Junwei and Gan, Zhenye and Luo, Donghao and Lin, Chuming and Xu, FeiFan and Peng, Xu and Hu, Jianlong and Liu, Yuansen and Hong, Yijia and Cao, Weijian and Feng, Han and Chen, Xu and Fu, Chencan and He, Keke and Hu, Xiaobin and Wang, Chengjie}, title = {Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3953-3964} }
Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Adimoolam_2026_CVPR, author = {Adimoolam, Yeshwanth Kumar and Poullis, Charalambos and Averkiou, Melinos}, title = {Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {72-81} }
ConsistCompose: Unified Multimodal Layout Control for Image Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Xuanke and Li, Boxuan and Han, Xiaoyang and Cai, Zhongang and Yang, Lei and Wang, Quan and Lin, Dahua}, title = {ConsistCompose: Unified Multimodal Layout Control for Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {495-505} }
iSplat: Iterative Learning for Fine-Grained Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haifeng and Long, Wei and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {iSplat: Iterative Learning for Fine-Grained Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11746-11755} }
Gyro-based Deep Video Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Rim_2026_CVPR, author = {Rim, Jaesung and Kim, Woohyeok and Lee, Haeyun and Yang, Heemin and Wang, Ke and Cho, Sunghyun}, title = {Gyro-based Deep Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8364-8374} }
PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Tang, Song and Huang, Yutong and Ding, Henghui and Tao, Dacheng}, title = {PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10165-10175} }
TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Guang and Shao, Jie and Tang, Ningyuan and Liu, Xinyao and Wu, Jianxin}, title = {TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6095-6105} }
HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sayem_2026_CVPR, author = {Sayem, MD Khalequzzaman Chowdhury and Chowdhury, Mubarrat Tajoar and Tiruneh, Yihalem Yimolal and Khan, Muneeb A. and Ali, Muhammad Salman and Bhattarai, Binod and Baek, Seungryul}, title = {HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2515-2525} }
FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Myunsoo and Shim, Seongwoong and Lee, Byung-Jun}, title = {FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {701-711} }
MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnamurthy_2026_CVPR, author = {Krishnamurthy, Bharath and Rattani, Ajita}, title = {MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4580-4589} }
3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zhixue and He, Xu and Tang, Songlin and Zhang, Haoxian and Li, Qingfeng and Liu, Xiaoqiang and Wan, Pengfei and Gai, Kun}, title = {3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2243-2252} }
Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yurun and Hu, Xueyu and Liu, Yuhan and Wang, Ziqi and Liao, Zeyi and Chen, Lin and Wei, Feng and Qian, Yuxi and Zheng, Bo and Yin, Keting and Zhang, Shengyu}, title = {Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {735-744} }
VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2026_CVPR, author = {Gui, Haokun and Yang, Senqiao and Zhu, Mingkang and Chu, Meng and Wu, Sitong and Lu, Changsheng and Wang, Zihao and Tian, Zhuotao and Jia, Jiaya}, title = {VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5187-5198} }
EgoX: Egocentric Video Generation from a Single Exocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Kim, Kinam and Kim, Dohyeon and Park, Minho and Hyung, Junha and Choo, Jaegul}, title = {EgoX: Egocentric Video Generation from a Single Exocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11116-11126} }
The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments-
[pdf]
[supp]
[bibtex]@InProceedings{Shahar_2026_CVPR, author = {Shahar, Ofir Itzhak and Elkin, Gur and Ben-Shahar, Ohad}, title = {The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3186-3196} }
COT-FM: Cluster-wise Optimal Transport Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chiang_2026_CVPR, author = {Chiang, Chiensheng and Tu, Kuan-Hsun and Liao, Jia-Wei and Chou, Cheng-Fu and Ke, Tsung-Wei}, title = {COT-FM: Cluster-wise Optimal Transport Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11515-11524} }
KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mengxin and Wang, Yulin and Luo, Chen and Li, Yongzhe and Zhou, Yijun}, title = {KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13866-13875} }
LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Longzhao and Zhang, Shuo and Gao, Chen and Tian, Qian and Lin, Youfang}, title = {LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1355-1364} }
Test-time Sparsity for Extreme Fast Action Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Kangye and Meng, Yuan and Zhou, Jianbo and Li, Ye and Tang, Chen and Wang, Zhi}, title = {Test-time Sparsity for Extreme Fast Action Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9414-9423} }
Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Hu, Zhangchi and Xiao, Xu and Kong, Bin}, title = {Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11589-11598} }
The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qingdong and Chen, Xueqin and Pan, Yanjie and Tang, Peng and Xu, Pengcheng and Gan, Zhenye and Wang, Chengjie and Hu, Xiaobin and Zhang, Jiangning and Wang, Yabiao}, title = {The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9182-9191} }
ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongtao and Dai, Jiaqi and Zhu, Qingtian and Li, Yilong and Su, Mai and Zhu, Fei and Gai, Meng and Wang, Shaorong and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8298-8307} }
Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Yucheng and Lian, Guoxin and Wang, Yongcai and Chen, Maiyue and Wang, Kaihui and Zhang, Bo and Su, Zhizhong and Zhou, Yutian and Li, Wanting and Li, Deying and Fan, Zhaoxin}, title = {Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4076-4086} }
PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gottwald_2026_CVPR, author = {Gottwald, Thomas and Heinert, Edgar and Stehr, Peter and Galappaththige, Chamuditha Jayanga and Rottmann, Matthias}, title = {PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11871-11880} }
Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Seng Nam and Chen, Hao and Ho, Chenglam and Mao, Xinyu and Wang, Jinping and Zhang, Yu and Li, Chao}, title = {Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4515-4525} }
CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, James Jincheng and Wu, Yuxiao and Cai, Youcheng and Liu, Ligang}, title = {CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5934-5944} }
ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Weiqin and Cheng, Hao and Vosselman, George and Persello, Claudio}, title = {ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13244-13253} }
Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dastmalchi_2026_CVPR, author = {Dastmalchi, Hamidreza and An, Aijun and Cheraghian, Ali and Barzamini, Hamed}, title = {Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4178-4187} }
PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xianqi and Yang, Hao and Wang, Hangtian and Cheng, Junda and Xu, Gangwei and Lin, Min and Yang, Xin}, title = {PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12565-12575} }
Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tokhchukov_2026_CVPR, author = {Tokhchukov, Danil and Mirzoeva, Aysel and Kuznetsov, Andrey and Sobolev, Konstantin}, title = {Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4635-4644} }
Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwen and Zhou, Hao and Qi, Huiyu and Huang, Zhao and Zhang, Guangyuan and Jiang, Shaowei and Tang, Wenwen and Yang, Bin and Liu, Jin and Zhang, Xiaoshuai and Huang, Xingru}, title = {Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13908-13917} }
CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning-
[pdf]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Xiang and Fang, Wanlong and Wang, Changshuo}, title = {CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7923-7935} }
Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Wenwen and Huang, Wenke and Fang, Yiyang and Qu, Wenjie and Zhang, Jiaheng and Ye, Mang}, title = {Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13316-13325} }
Bezier Degradation Modeling for LiDAR-based Human Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Xiaoqi and Zhao, Lin and Li, Jun and Gong, Chen and Yang, Jian}, title = {Bezier Degradation Modeling for LiDAR-based Human Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14027-14037} }
Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Qiuhai and Chen, Kang and Lu, Zhengjie and Wang, Tingting and Fang, Faming and Zhang, Guixu}, title = {Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2188-2197} }
DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Wang, Zhiru and Xie, Shiyun and Pan, Chengwei and Chen, Yisong}, title = {DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4912-4921} }
ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuelu and Wang, Zhaonan and Wang, Xiaogang and Wu, Lei and Li, Manyi and Tu, Changhe}, title = {ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13897-13907} }
Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection-
[pdf]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Jianwei and Dai, Yunshu and Zhou, Xiaoyu and Xia, Zhihua and Piva, Alessandro}, title = {Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14084-14094} }
Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaodong and Wu, Zhirong and Huang, Langling and Zheng, Yuxi and Peng, Peixi}, title = {Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5444-5454} }
Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Briedis_2026_CVPR, author = {Briedis, Karlis Martins and Studios and 0000-0003-4012-6292, ETH Zurich and Gross, Markus and Studios and 0009-0003-9324-779X, ETH Zurich and Schroers, Christopher and 0000-0003-1473-1878, Studios}, title = {Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5700-5709} }
Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Songze and Gao, Mingyu and Su, Tonghua and Zhang, Xu-Yao and Wang, Zhongjie}, title = {Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10820-10829} }
AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sheng, Hualian and Cai, Sijia and Yang, Yuxiao and Zhang, Weizhan and Yan, Caixia and Deng, Bing and Ye, Jieping}, title = {AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12808-12817} }
ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiyu and Chen, Shaoyu and Yin, Haoran and Zhang, Xinbang and Zou, Jialv and Wang, Xinggang and Zhang, Qian and Zhang, Lefei}, title = {ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3729-3739} }
FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yizhou and Jiang, Genze and Cheng, Yihua and Wang, Kezhi}, title = {FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3750-3760} }
ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Quan and Shen, Yuhao and Ji, Yicheng and Li, Huan and Wang, Cong}, title = {ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11392-11402} }
Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Aobo and Wu, Jinjian and Liu, Yongxu and Ma, Jupo and Dong, Weisheng}, title = {Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1310-1319} }
EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Yiyang and Huang, Wenke and Fu, Pei and Yang, Yihao and Su, Kehua and Luo, Zhenbo and Luan, Jian and Ye, Mang}, title = {EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {745-755} }
Illumination-Consistent Human-Scene Reconstruction from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Rongbin and Li, Wensheng and Zeng, Lingzhe and Wang, Dong and Gao, Chengying}, title = {Illumination-Consistent Human-Scene Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14050-14061} }
MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hui and Lyu, Jiayue and Wang, Fu-Yun and Cheng, Kaihui and Zhu, Siyu and Wang, Jingdong}, title = {MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9095-9105} }
TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiwei and Pang, Yitian and Wang, Weining and Sun, Zhenan and Li, Qi}, title = {TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1513-1522} }
DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jin and Xi, Ning and Miao, Yinbin and Liu, Junkang}, title = {DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3358-3368} }
MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Alawode_2026_CVPR, author = {Alawode, Basit and Mahmood, Arif and Al Radi, Muaz Khalifa and Albastaki, Shahad and Khan, Asim and Bilal, Muhammad and Abdalla, Moshira Ali and Bennamoun, Mohammed and Javed, Sajid}, title = {MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13732-13743} }
Frequency-Aware Flow Matching for High-Quality Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Frequency-Aware Flow Matching for High-Quality Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9074-9083} }
SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qingmei and Zhang, Yang and Zhang, Peifeng and Fu, Haohuan and Zheng, Juepeng}, title = {SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13134-13144} }
Rethinking Dataset Distillation: Hard Truths about Soft Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dey_2026_CVPR, author = {Dey, Priyam and Sahdev, Aditya and Bhati, Sunny and Mopuri, Konda Reddy and Radhakrishnan, Venkatesh Babu}, title = {Rethinking Dataset Distillation: Hard Truths about Soft Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {178-187} }
LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Hyunsoo and Yeo, Sangyeop and Yoo, Jaejun}, title = {LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5564-5573} }
EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Litman_2026_CVPR, author = {Litman, Yehonathan and Liu, Shikun and Seyb, Dario and Milef, Nicholas and Zhou, Yang and Marshall, Carl and Tulsiani, Shubham and Leak, Caleb}, title = {EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8965-8975} }
Masking Teacher and Reinforcing Student for Distilling Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Masking Teacher and Reinforcing Student for Distilling Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10126-10141} }
DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junha and Park, Eunha and Cho, Minsu}, title = {DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1067-1077} }
Contact-Aware Neural Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Changwei and Bandi, Jai Krishna and Ye, Jianglong and Duan, Yan and Abbeel, Pieter and Wang, Xiaolong and Yi, Sha}, title = {Contact-Aware Neural Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13442-13452} }
CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Wenhao and Zhao, Zhaoran and Lu, Peng and Li, Sheng and Qiao, Qian and Li, RuiDe}, title = {CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2136-2145} }
FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Cao, Haidong and Dai, Qi and Dong, Daoguo and Wu, Zuxuan}, title = {FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8986-8996} }
Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linjie and Xiao, Huiyu and Cao, Jiarui and Wu, Zhenyu and Ji, Yang}, title = {Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3920-3929} }
See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongchang and Ma, Oliver and Liu, Tianyi and Zhou, Guangquan and Chen, Yang}, title = {See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11933-11942} }
HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ben_Ami_2026_CVPR, author = {Ben Ami, Dan and Serussi, Gabriele and Cohen, Kobi and Baskin, Chaim}, title = {HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4505-4514} }
LAMP: Language-Assisted Motion Planning for Controllable Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kizil_2026_CVPR, author = {Kizil, Muhammed Burak and Sanli, Enes and Mitra, Niloy J. and Erdem, Erkut and Erdem, Aykut and Ceylan, Duygu}, title = {LAMP: Language-Assisted Motion Planning for Controllable Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12829-12838} }
MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuehui and Wu, Zhenyu and Xie, JingJing and Ding, Zichen and Yang, Bowen and Li, Zehao and Liu, Zhaoyang and Li, Qingyun and Dong, Xuan and Chen, Zhe and Wang, Weiyun and Zhao, Xiangyu and Chen, Jixuan and Duan, Haodong and Xie, Tianbao and Yang, Chenyu and Su, Shiqian and Yu, Yue and Zhang, Yanting and Yue, Xiangyu and Su, Weijie and Zhu, Xizhou and Shen, Wei and Dai, Jifeng and Wang, Wenhai}, title = {MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6239-6248} }
DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Junho and Jeong, Jaemo and Kim, Hyunju and Lee, Dongman}, title = {DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12860-12870} }
ViStoryBench: Comprehensive Benchmark Suite for Story Visualization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Cailin and Huang, Ailin and Hu, Yaoqi and Wu, Jingwei and Cheng, Wei and Liao, Jiaqi and Wang, Hongyuan and Liao, Xinyao and Cai, Weiwei and Xu, Hengyuan and Zhang, Xuanyang and Zeng, Xianfang and Huang, Zhewei and Yu, Gang and Zhang, Chi}, title = {ViStoryBench: Comprehensive Benchmark Suite for Story Visualization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9455-9467} }
Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Minghe and Li, Juncheng and Lin, Yuze and Liu, Xuqi and Ji, Jiaming and Pan, Xiaoran and Xu, Zihan and Li, Xian and Li, Mingjie and Ji, Wei and Wei, Rong and Tang, Rui and Wang, Qizhou and Shen, Kai and Xiao, Jun and Wu, Qi and Tang, Siliang and Zhuang, Yueting}, title = {Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1031-1040} }
Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Mukai and Dabhi, Mosam and Xie, Liuyue and Scherer, Sebastian and Jeni, L\'aszl\'o A.}, title = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6106-6115} }
RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhang, Yue and Wang, Mingdao and Zhang, Yasen and Song, Teer and Tian, Yu and Li, Xueming}, title = {RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5273-5282} }
Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Junyuan and Xiang, Xunzhi and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12385-12395} }
LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yuanming and Li, Chengqi and He, Wenbo}, title = {LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {92-101} }
MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Junyu and She, Zhendong and Zhang, Chenghanyu and Sun, Yuchuang and Luo, Luqing and Tan, Dingwei and Guo, Zonghao and Guo, Bo and Han, Zehua and Xie, Wupeng and Mu, Yaxin and Zhang, Peng and Li, Peipei and Wang, Fengxiang and Sun, Yangang and Sun, Maosong}, title = {MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8653-8663} }
Think Before You Drive: World Model-Inspired Multimodal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Haicheng and Shen, Huanming and Wang, Bonan and Li, Yongkang and Tang, Yihong and Wang, Chengyue and Zhuang, Dingyi and Chen, Kehua and Yang, Hai and Xu, Chengzhong and Li, Zhenning}, title = {Think Before You Drive: World Model-Inspired Multimodal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3677-3687} }
Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Maser_2026_CVPR, author = {Maser, Raphael and Gairola, Siddhartha and Rao, Sukrut and Schiele, Bernt}, title = {Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9869-9879} }
PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ziang and Hong, Fangzhou and Chen, Zhaoxi and Pan, Liang and Liu, Ziwei}, title = {PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5839-5848} }
HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Yihao and Ouyang, Hao and Yu, Yue and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Wang, Hanlin and Ma, Shuailei and Li, Yixuan and Chen, Cheng and Zeng, Yanhong and Zhu, Xing and Shen, Yujun and Qu, Huamin}, title = {HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {461-471} }
S$^2$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Beining and Zhu, Siting and Jin, Zhao and Li, Junxian and Wang, Hesheng}, title = {S\${\textasciicircum}2\$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2557-2569} }
Region-Adaptive Sampling for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ziming and Yang, Yifan and Zhang, Chengruidong and Zhang, Yiqi and Qiu, Lili and You, Yang and Yang, Yuqing}, title = {Region-Adaptive Sampling for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2346-2356} }
Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhong and Zhang, Beichen and Zang, Yuhang and Cao, Yuhang and Xing, Long and Dong, Xiaoyi and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9570-9581} }
SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Jiesong and Zhong, Ruizhe and Zhou, Zixiang and Mi, Xiaoyue and Hu, Long and Zhou, Yuan and Lu, Qinglin and Hao, Yixue and Yan, Junchi}, title = {SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12798-12807} }
Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Weitong and Zhang, Hang and Huang, Yukai and Sun, Shitong and Deng, Jiankang and Xu, Songcen and Song, Jifei and Zhang, Zhensong}, title = {Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9784-9793} }
UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Geonuk and Kim, Minhoi and Lee, Kangil and Kim, Minsu and Jeon, Hyeonseong and Han, Jeonghoon and Lim, Hyoungjoon and Yim, Junho}, title = {UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6261-6270} }
Globscope: Toward a Global View of the Loss Landscape-
[pdf]
[supp]
[bibtex]@InProceedings{Mustaq_2026_CVPR, author = {Mustaq, Mashiat and Tricoche, Xavier M.}, title = {Globscope: Toward a Global View of the Loss Landscape}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5263-5272} }
PaNDaS: Learnable Shape Interpolation Modeling with Localized Control-
[pdf]
[supp]
[bibtex]@InProceedings{Besnier_2026_CVPR, author = {Besnier, Thomas and Pierson, Emery and Arguillere, Sylvain and Ovsjanikov, Maks and Daoudi, Mohamed}, title = {PaNDaS: Learnable Shape Interpolation Modeling with Localized Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13102-13112} }
Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ruining and Xu, Yi and Fu, Yun and Su, Lili}, title = {Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10632-10641} }
iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Sarthak and Rebbapragada, Sairam VC and Bonthu, Mani and Balasubramanian, Vineeth N.}, title = {iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6229-6238} }
PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Guohao and Peng, Yuxin}, title = {PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7859-7868} }
Label-Free Cross-Task LoRA Merging with Null-Space Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Wonyoung and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Label-Free Cross-Task LoRA Merging with Null-Space Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {847-859} }
An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Quyen and Nguyen, Hai and Dao, Quan and Phan, Hoang and Van, Linh and Than, Khoat and Phung, Dinh and Metaxas, Dimitris and Le, Trung}, title = {An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10851-10862} }
VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Meng and Yang, Senqiao and Che, Haoxuan and Zhang, Suiyun and Zhang, Xichen and Yu, Shaozuo and Gui, Haokun and Rao, Zhefan and Tu, Dandan and Liu, Rui and Jia, Jiaya}, title = {VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9203-9212} }
PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Sixiang and Lai, Jianyu and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Ye, Tian and Luo, Junfeng and Wei, Xiaoming and Zhu, Lei}, title = {PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5978-5987} }
BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Varghese_2026_CVPR, author = {Varghese, Subin and Gao, Joshua and Rahman, Asad Ur and Hoskere, Vedhus}, title = {BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8163-8173} }
A Unified Perspective on Adversarial Membership Manipulation in Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ruize and Zhou, Kaiwen and Chen, Yongqiang and Liu, Feng}, title = {A Unified Perspective on Adversarial Membership Manipulation in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1554-1564} }
Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seongyu and Lee, Seungwoo and Ryu, Hyeonggon and Chung, Joon Son and Senocak, Arda}, title = {Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8717-8726} }
Data-Centric Meta-Learning for Robust Few-Shot Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Jongmin and Cha, Soobin and Park, Jaehun and Oh, Inho and Park, Minho and Kim, Kwangsu}, title = {Data-Centric Meta-Learning for Robust Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5543-5552} }
MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Liu, Hang and Ji, Jiayi and Sun, Xiaoshuai and Ji, Rongrong}, title = {MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8227-8236} }
Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Hongrui and Luo, Junjie and Fu, Zhihong and Zhu, Shengnan and Wen, Jiawei and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11174-11184} }
Chain of World: World Model Thinking in Latent Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fuxiang and Di, Donglin and Tang, Lulu and Zhang, Xuancheng and Fan, Lei and Li, Hao and Chen, Wei and Su, Tonghua and Ma, Baorui}, title = {Chain of World: World Model Thinking in Latent Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6675-6684} }
MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chai_2026_CVPR, author = {Chai, Zenghao and Tang, Chen and Wong, Yongkang and Yang, Xulei and Kankanhalli, Mohan}, title = {MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13962-13973} }
Inferring Compositional 4D Scenes without Ever Seeing One-
[pdf]
[supp]
[bibtex]@InProceedings{Gokmen_2026_CVPR, author = {G\"okmen, Ahmet Berke and Chhatkuli, Ajad and Van Gool, Luc and Paudel, Danda Pani}, title = {Inferring Compositional 4D Scenes without Ever Seeing One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {317-329} }
Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Ge, Yi and Li, Ming and Zhang, Zuolong and Kulkarni, Pranav and Wang, Kaishen and He, Qi and Zhu, Zeying and Liu, Chenxi and Chen, Ruibo and Zheng, Tong and Chen, Yanshuo and Wang, Xiyao and Zhang, Renrui and Chen, Wenhu and Huang, Heng}, title = {Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8641-8652} }
OneThinker: All-in-one Reasoning Model for Image and Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Kaituo and Zhang, Manyuan and Li, Hongyu and Fan, Kaixuan and Chen, Shuang and Jiang, Yilei and Zheng, Dian and Sun, Peiwen and Zhang, Yiyuan and Sun, Haoze and Feng, Yan and Pei, Peng and Cai, Xunliang and Yue, Xiangyu}, title = {OneThinker: All-in-one Reasoning Model for Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5432-5443} }
On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chongyang and Li, Mingsong and Lu, Haodong and Gong, Dong}, title = {On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3941-3952} }
VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Xuehe and Li, Wenshuo and Li, Yali and Shu, Han and Wang, Yuan and Chen, Xinghao and Wang, Shengjin}, title = {VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4168-4177} }
Bidirectional Normalizing Flow: From Data to Noise and Back-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yiyang and Sun, Qiao and Wang, Xianbang and Jiang, Zhicheng and Zhao, Hanhong and He, Kaiming}, title = {Bidirectional Normalizing Flow: From Data to Noise and Back}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2069-2078} }
4C4D: 4 Camera 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junsheng and Yang, Zhifan and Han, Liang and Zhang, Wenyuan and Shi, Kanle and Xu, Shenkun and Liu, Yu-Shen}, title = {4C4D: 4 Camera 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11829-11839} }
CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xingru and Ye, Shuanghua and Huang, Zhao and Tang, Wenwen and Zhou, Huiyu and Zheng, Zhiwen and Liu, Jin and Zhang, Xiaoshuai}, title = {CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8514-8524} }
Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghazanfari_2026_CVPR, author = {Ghazanfari, Sara and Croce, Francesco and Flammarion, Nicolas and Krishnamurthy, Prashanth and Khorrami, Farshad and Garg, Siddharth}, title = {Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2746-2755} }
Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Dongjun and Dai, Weichen and Qian, Jingsheng and Liu, Honggang and Yi, Hangjie and Kong, Wanzeng}, title = {Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7869-7878} }
What Matters in Practical Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tatwawadi_2026_CVPR, author = {Tatwawadi, Kedar and Rahimzadeh, Parisa and Sun, Zhanghao and Chen, Zhiqi and Yang, Ziyun and Nair, Sanjay and Hasteer, Divija and Rippel, Oren}, title = {What Matters in Practical Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12095-12105} }
DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuo and Miao, Bingchen and Bu, Wendong and Li, Juncheng and Zhang, Hanwang and Wu, Fei}, title = {DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7847-7858} }
G$^2$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Wenbo and Lin, Jingli and Long, Yilin and Ran, Yunlong and Jiang, Lihan and Wang, Yifan and Zhu, Chenming and Xu, Runsen and Wang, Tai and Pang, Jiangmiao}, title = {G\${\textasciicircum}2\$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9535-9546} }
ViT$^3$: Unlocking Test-Time Training in Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Dongchen and Li, Yining and Li, Tianyu and Cao, Zixuan and Wang, Ziming and Song, Jun and Cheng, Yu and Zheng, Bo and Huang, Gao}, title = {ViT\${\textasciicircum}3\$: Unlocking Test-Time Training in Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {51-61} }
CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric-
[pdf]
[supp]
[bibtex]@InProceedings{Polamreddy_2026_CVPR, author = {Polamreddy, Lakshmikar Reddy and Ma, Ming}, title = {CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1472-1481} }
Learning to Select Visual Tools from Experience-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Rajan, Anirudh Sundara and Cai, Zefan and Xiao, Wen and Wang, Haohan and Hu, Junjie and Lee, Yong Jae}, title = {Learning to Select Visual Tools from Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4783-4793} }
UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Kang and Liao, Xue and Xia, Junpeng and Guo, Chaozheng and Gu, Yi and Guan, Yirui and Wang, Duotun and Huang, Sheng and Wang, Zeyu}, title = {UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5670-5679} }
DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Chuan and Yuan, Haoqi and Huang, Ziye and Xu, Chaoyi and Ma, Kai and Lu, Zongqing}, title = {DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {986-995} }
DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Zhao, Pinlong}, title = {DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6126-6135} }
Advancing Image Classification with Discrete Diffusion Classification Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Belhasin_2026_CVPR, author = {Belhasin, Omer and Golan, Shelly and El-Yaniv, Ran and Elad, Michael}, title = {Advancing Image Classification with Discrete Diffusion Classification Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {124-134} }
CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Lv, Pengcheng}, title = {CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11546-11555} }
RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces-
[pdf]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Haonan and Ye, Xiaohui and Hua, Guang and Tao, Yihang and Cao, Hangcheng and Yu, Xiangyu and Fang, Yuguang}, title = {RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8587-8597} }
Scaling Zero-Shot Reference-to-Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijian and Liu, Shikun and Liu, Haozhe and Qiu, Haonan and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Huang, Xiaoke and Ng, Kam-Woh and Xie, Tian and Han, Xiao and Cong, Yuren and Li, Hang and Zhu, Chuyan and Patel, Aditya and Xiang, Tao and He, Sen}, title = {Scaling Zero-Shot Reference-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9253-9262} }
$\oslash$ Source Models Leak What They Shouldn't $\nrightarrow$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Devalapally_2026_CVPR, author = {Devalapally, Arnav and Jain, Poornima and Srinivas, Kartik and Balasubramanian, Vineeth N.}, title = {\${\textbackslash}oslash\$ Source Models Leak What They Shouldn't \${\textbackslash}nrightarrow\$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1543-1553} }
Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yinuo and Cheng, Jun and Wang, Yiran and Cheng, Cheng}, title = {Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2993-3003} }
VDOT: Efficient Unified Video Creation via Optimal Transport Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yutong and Zhang, Haiyu and Xue, Tianfan and Qiao, Yu and Wang, Yaohui and Xu, Chang and Chen, Xinyuan}, title = {VDOT: Efficient Unified Video Creation via Optimal Transport Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9273-9283} }
Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Jiayu and Yang, Zongxian and Chen, Guanxing and Hu, Pengwei and Tan, KC and Wang, Yan and Huang, Yu-An and Huang, Zhi-An}, title = {Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13712-13721} }
Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Boyu and Shi, Liang and Lin, Zhengzhi and Xiang, Lanxin and Stowe, Loren and Guo, Feng}, title = {Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4829-4838} }
UniCorrn: Unified Correspondence Transformer Across 2D and 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goswami_2026_CVPR, author = {Goswami, Prajnan and Ding, Tianye and Liu, Feng and Jiang, Huaizu}, title = {UniCorrn: Unified Correspondence Transformer Across 2D and 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9943-9954} }
DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhe and Huang, Runhui and Yang, Rui and Yan, Siming and Wang, Zining and Hou, Lu and Lin, Di and Bai, Xiang and Zhao, Hengshuang}, title = {DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3688-3698} }
WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Runsheng and Lin, Hubert and Jeon, Wonseok and Feng, Hao and Zou, Yuliang and Sun, Liting and Gorman, John and Tolstaya, Kate and Tang, Sarah and White, Brandyn and Sapp, Ben and Tan, Mingxing and Hwang, Jyh-Jing and Anguelov, Dragomir}, title = {WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3709-3718} }
NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawal_2026_CVPR, author = {Rawal, Ishaan and Gupta, Shubh and Hu, Yihan and Zhan, Wei}, title = {NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10965-10975} }
Variation-aware Vision Token Dropping for Faster Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Xuyang and Wen, Zichen and Wang, Yiyu and Huang, Siteng and Chen, Honggang}, title = {Variation-aware Vision Token Dropping for Faster Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3489-3499} }
Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Weijia and Yang, Jingyu and Zhang, Ruojia and Sun, Fengtao and Gao, Qian and Wang, Chenyang and Su, Tongtong and Guo, Jia and Li, Xiaobai and Shao, Minglai}, title = {Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13887-13896} }
DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoxue and Xiong, Ziyi and Chen, Yuantao and Li, Gen and Wang, Nan and Luo, Hongcheng and Chen, Long and Sun, Haiyang and Wang, Bing and Chen, Guang and Li, Hongyang and Zhang, Ya-Qin and Ye, Hangjun and Zhao, Hao}, title = {DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1265-1276} }
FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zekai and Fan, Shuqi and Liu, Mengyin and Luo, Yuhua and Lin, Xincheng and Yan, Ming and Wu, Junhao and Lin, Xiuhong and Ma, Yuexin and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2221-2231} }
E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qitao and Tan, Hao and Wang, Qianqian and Bi, Sai and Zhang, Kai and Sunkavalli, Kalyan and Tulsiani, Shubham and Jiang, Hanwen}, title = {E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7525-7535} }
RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection-
[pdf]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xin and Liu, Wenjie}, title = {RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {284-294} }
Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Zhang, Jiajin and Hu, Yaojun and Hao, Bingguang and Cao, Xin and Xia, Yingda and Tu, Danyang and Gu, Shi and Zhang, Ling}, title = {Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9880-9890} }
SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Jiayuan and Zhao, Yiming and Guo, Zhenglong and Pan, Yong and Hou, Wenbo and Hao, Zhihui and Zhan, Kun and Chen, Qijun}, title = {SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7425-7434} }
Universal 3D Shape Matching via Coarse-to-Fine Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Qinfeng and Mei, Guofeng and Yang, Bo and Zhang, Liying and Zhang, Jian and Yick, Kit-lun}, title = {Universal 3D Shape Matching via Coarse-to-Fine Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13157-13167} }
MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Dong, Xingping and Yu, Xin and Luo, Wenhan and Liu, Wei and Zhang, Kaihao}, title = {MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2693-2703} }
SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asthana_2026_CVPR, author = {Asthana, Pranav and Hanson, Alex and Tu, Allen and Goldstein, Tom and Zwicker, Matthias and Varshney, Amitabh}, title = {SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11840-11849} }
TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuewei and Meng, Yajie and Zeng, Pan and Tang, Xianfang and Cui, Feifei and Jin, Qiangguo and Yang, Jialiang and Xu, Junlin}, title = {TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10502-10511} }
RunawayEvil: Jailbreaking the Image-to-Video Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Songping and Qian, Rufan and Lyu, Yueming and Liu, Qinglong and Zou, Linzhuang and Qin, Jie and Liu, Songhua and Shan, Caifeng}, title = {RunawayEvil: Jailbreaking the Image-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9296-9305} }
Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Valverde_2026_CVPR, author = {Valverde, Juan Miguel and Papadopoulos, Dim P. and Larsen, Rasmus and Dahl, Anders Bjorholm}, title = {Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13123-13133} }
InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiarui and Wang, Yujin and Li, Ruikang and Zhang, Fan and Yao, Mingde and Xue, Tianfan}, title = {InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8216-8226} }
When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yihuan and Xue, Jun and Jiajun, Liu and Li, Daixian and Zhang, Tong and Yi, Zhuolin and Ren, Yanzhen and Li, Kai}, title = {When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4448-4457} }
Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dachuan and Li, Weiyue and Shen, Zhenda and Qiu, Yushu and Xu, Bowen and Chen, Haoyu and Chen, Yongchao}, title = {Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10230-10240} }
MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kehua and Mao, Tianlu and Ma, Xinzhu and Jiang, Hao and Li, Zehao and Liu, Zihan and Gao, Shuqin and Zhao, Honglong and Dai, Feng and Zhang, Yucheng and Wang, Zhaoqi}, title = {MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {395-405} }
PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Weifu and Li, Jinyang and Gao, Bin-Bin and Li, Jialin and Lin, Yuhuan and Deng, Hanqiu and Tao, Wenbing and Liu, Yong and Wang, Chengjie}, title = {PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13039-13048} }
UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Congpei and Hu, Zhaoyu and Ke, Wei and Tian, Zhuotao and Wu, Yanhao and Zhang, Tong}, title = {UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10061-10070} }
Elastic Weight Consolidation Done Right for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {Elastic Weight Consolidation Done Right for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3930-3940} }
ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mingyang and Mishra, Ashirbad and Dey, Soumik and Xing, Shuo and Ravipati, Naveen and Wu, Hansi and Li, Binbin and Tu, Zhengzhong}, title = {ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1853-1863} }
SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Utintu_2026_CVPR, author = {Utintu, Chaitat and Song, Yi-Zhe}, title = {SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {484-494} }
VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Tairan and Wang, Zi and Xue, Haoru and Ben, Qingwei and Luo, Zhengyi and Xiao, Wenli and Yuan, Ye and Da, Xingye and Casta\~neda, Fernando and Sastry, Shankar and Liu, Changliu and Shi, Guanya and Fan, Linxi and Zhu, Yuke}, title = {VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13430-13441} }
PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Zhang, Yubo and Sun, Ting and Wang, Xueqing and Liu, Hongen and Lin, Manhui and Zhang, Yue and Gao, Tingquan and Zhou, Changda and Liu, Jiaxuan and Zhang, Zelun and Zhang, Jing and Zhang, Jun and Liu, Yi}, title = {PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2467-2476} }
SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Ziyan and Zhang, Qiudan and Ma, Lin and Wang, Xu}, title = {SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5740-5750} }
ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Linqing and Liu, Yi and Wei, Yifei and Xiong, Ziyu and Liu, Si and Ren, Guanghui}, title = {ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8152-8162} }
Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Ruihong and Shi, Xuepeng and Bailo, Oleksandr and Manfredi, Marco and Gevers, Theo}, title = {Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2457-2466} }
MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Curvo_2026_CVPR, author = {Curvo, Pedro M. P. and van de Meent, Jan-Willem and Zhdanov, Maksim}, title = {MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12924-12933} }
Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Jian and Feng, Yujian and You, Shuai and Zhou, Zhongkai and Wu, Fei and Jing, Zhengjun and Ji, Yimu}, title = {Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4343-4352} }
SARMAE: Masked Autoencoder for SAR Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Danxu and Wang, Di and Wang, Hebaixu and Chen, Haoyang and Jiang, Wentao and Cheng, Yilin and Guo, Haonan and Cui, Wei and Zhang, Jing}, title = {SARMAE: Masked Autoencoder for SAR Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6496-6507} }
CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Shilei and Gong, Ziyang and Lin, Hehai and Liu, Yang and Cheng, Jiashun and Hu, Xiaoxing and Liang, Haoyuan and Li, Guowen and Qin, Chengwei and Cheng, Hong and Yang, Xue and Zheng, Juepeng and Fu, Haohuan}, title = {CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13222-13233} }
VMonarch: Efficient Video Diffusion Transformers with Structured Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Cheng and Chen, Haoxian and Hou, Liang and Fan, Qi and Wu, Gangshan and Tao, Xin and Wang, Limin}, title = {VMonarch: Efficient Video Diffusion Transformers with Structured Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4613-4623} }
OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Tengjin and Jiang, Wenhao and Wang, Jingyi and Li, Ming and Ma, Lin and Ming, Zhong}, title = {OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1575-1584} }
SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Ye-Chan and Cha, SeungJu and Kim, Si-Woo and Jeon, Minju and Kim, Hyungee and Kim, Dong-Jin}, title = {SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3466-3475} }
ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Subhani_2026_CVPR, author = {Subhani, Muhammad Naseer}, title = {ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3805-3814} }
Illuminating Visual Identity in Universal Multimodal Embeddings-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jiawei and Feng, Junyi and Hua, Jiashen and Huang, Ziheng and Deng, Bing and Wu, Kaijie and Gu, Chaochen and Ye, Jieping}, title = {Illuminating Visual Identity in Universal Multimodal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8737-8748} }
SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rong_2026_CVPR, author = {Rong, Xuankun and Huang, Wenke and Wang, Tingfeng and Zhou, Daiguo and Du, Bo and Ye, Mang}, title = {SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7901-7911} }
Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Shangjie and Dill, Jesse and Ahuja, Dhruv and Dellaert, Frank and Tsiotras, Panagiotis and Xu, Danfei}, title = {Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5014-5026} }
Radiance Meshes for Volumetric Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Alexander and Hedstrom, Trevor and Kopanas, George and Kontkanen, Janne and Kuester, Falko and Barron, Jonathan T.}, title = {Radiance Meshes for Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8267-8277} }
Unified Primitive Proxies for Structured Shape Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Zhu, Xiao Xiang}, title = {Unified Primitive Proxies for Structured Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7457-7467} }
TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingwen and Jiang, Chenhan and Zhu, Xiaomeng and Miao, Yunqi and Zhang, Yushan and Andersson, Olov and Jensfelt, Patric}, title = {TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3667-3676} }
Batch Loss Score for Dynamic Data Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qing and Zhao, Bingxuan and Yang, Tao and Zhang, Hongyuan and Gao, Junyu and Wang, Qi}, title = {Batch Loss Score for Dynamic Data Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6188-6197} }
Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking-
[pdf]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Andong and Zha, Ziyi and Jin, Jiandong and Li, Shihao and Li, Chenglong and Tang, Jin and Luo, Bin}, title = {Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13584-13593} }
SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiao and Gan, Shiwei and Yin, Yafeng and Guo, Bowen and Jiang, Zhiwei and Meng, Shunmei and Xie, Lei and Lu, Sanglu}, title = {SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2198-2208} }
PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Wang, Dong and Lyu, Xinqi and Xiao, Bin}, title = {PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8621-8630} }
LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Weilong and Li, Haipeng and Xu, Hao and Ye, Nianjin and Ai, Yihao and Liu, Shuaicheng and Hu, Jingyu}, title = {LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7588-7599} }
EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiansong and Yang, Xiaying and Luo, Xiaoling and Shen, Linlin}, title = {EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9040-9050} }
Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulong and Liang, Tianyi and Cui, Erfei and Wang, Guoqing and Guo, Xu and Li, Chenhui and Liu, Gongshen}, title = {Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11643-11653} }
AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation-
[pdf]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Luoxi and Shi, Dianxi and Cao, Yushe and Wang, Yuanze and Zhang, Junze and Cui, Yuning and Wang, Mengzhu}, title = {AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8033-8044} }
Interpretable and Steerable Concept Bottleneck Sparse Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Akshay and Weng, Tsui-Wei and Narayanaswamy, Vivek and Liu, Shusen and Sakla, Wesam A. and Thopalli, Kowshik}, title = {Interpretable and Steerable Concept Bottleneck Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2918-2927} }
Token Warping Helps MLLMs Look from Nearby Viewpoints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Phillip Y. and Park, Chanho and Park, Mingue and Yoo, Seungwoo and Koo, Juil and Sung, Minhyuk}, title = {Token Warping Helps MLLMs Look from Nearby Viewpoints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3476-3488} }
Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Enguang and Wang, Qiang and Wu, Yuanchen and Yan, Ke and Yuan, Xinbin and Ding, Shouhong and Liu, Xialei and Cheng, Ming-Ming}, title = {Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8813-8824} }
MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yiren and Liu, Cheng and Shou, Mike Zheng}, title = {MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11482-11492} }
Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sunghyun and Kim, Jeongho and Park, Hyoungwoo and Das, Debasmit and Yun, Sungrack and Hayat, Munawar and Choo, Jaegul and Porikli, Fatih and Choi, Seokeon}, title = {Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11504-11514} }
pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Chengyao and Wu, Jing and Le, Trung and Phung, Dinh and Harandi, Mehrtash}, title = {pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3306-3315} }
COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Siyi and Lin, Jinliang and Weng, Juanjuan and Liu, Zhihui and Li, Shaozi and Luo, Zhiming}, title = {COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11293-11302} }
ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Jiang, Jin-Hui and Hsiao, Yu-Jou and Hsu, Chih-Chung and Liu, Yu-Lun}, title = {ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1300-1309} }
Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenyang and Tang, Wenbing and Huang, Yihao and Zhan, Sinong Simon and Hu, Ming and Jia, Xiaojun and Liu, Yang}, title = {Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1565-1574} }
Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, JiaKui and Liu, Jialun and Yang, Liying and Zhang, Xinliang and Li, Kaiwen and Zeng, Shuang and Li, Yuanwei and Huang, Haibin and Zhang, Chi and Lu, Yanye}, title = {Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4258-4268} }
PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zheng and Zhang, Xueyi and Guo, Yanming and Xie, Yuxiang and Ding, Zhaoyun and Cai, Siqi and Li, Haizhou and Lao, Mingrui}, title = {PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5389-5398} }
LightMover: Generative Light Movement with Color and Intensity Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Wang, Tianyu and Kim, Soo Ye and Shu, Zhixin and Yu, Xin and Hold-Geoffroy, Yannick and Chaturvedi, Sumit and Wu, Qi and Lin, Zhe and Cohen, Scott}, title = {LightMover: Generative Light Movement with Color and Intensity Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8997-9007} }
InfinityHuman: Towards Long-Term Audio-Driven Human Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaodi and Xie, Pan and Ren, Yi and Gan, Qijun and Zhang, Chen and Kong, Fangyuan and Yin, Xiang and Yuan, Zehuan and Peng, Bingyue}, title = {InfinityHuman: Towards Long-Term Audio-Driven Human Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3978-3987} }
VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Hoeg_2026_CVPR, author = {H{\o}eg, August Leander and Bardenfleth, Sophia Wiinberg and Kjer, Hans Martin and Dyrby, Tim Bj{\o}rn and Dahl, Vedrana Andersen and Dahl, Anders Bjorholm}, title = {VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2166-2176} }
TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xinguo and Shen, Yixin and Chaudhari, Rahul}, title = {TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8921-8931} }
VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Shikun and Qu, Liao and Zhang, Huichao and Liu, Yiheng and Song, Yangyang and Li, Xian and Jiang, Yi and Wang, Xu and Jia, Jia and Du, Daniel K. and Wu, Xinglong}, title = {VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1874-1884} }
Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junjie and Xue, Bao and Wang, Meiling and Shao, Wei and Zhang, Daoqiang}, title = {Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12554-12564} }
Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Engelbracht_2026_CVPR, author = {Engelbracht, Tim and Zurbr\"ugg, Ren\'e and Wohlrapp, Matteo and B\"uchner, Martin and Valada, Abhinav and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8880-8890} }
MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oshima_2026_CVPR, author = {Oshima, Yuta and Miyake, Daiki and Matsutani, Kohsei and Iwasawa, Yusuke and Suzuki, Masahiro and Matsuo, Yutaka and Furuta, Hiroki}, title = {MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {448-460} }
Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qian and Fu, Rao and Li, Jiangtao and Liu, Fan}, title = {Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4891-4901} }
TouchDream: 3D Object Completion through Imagined Touch-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanbo and Wang, Xinning and Zhang, Zhaoxuan and Wang, Changlong and Xia, Qianchen and Wei, Xiaopeng and Yang, Xin}, title = {TouchDream: 3D Object Completion through Imagined Touch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8901-8910} }
Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Inha and Kim, Eunki and Ryu, Wonjeong and Shin, Jaeyo and Yu, Seungjun and Kang, Yoon-Hee and Jeong, Seongeun and Kim, Eunhye and Kim, Soontae and Shim, Hyunjung}, title = {Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6421-6431} }
Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zelong and Wu, Jiahui and Ba, Ying and Jing, Dong and Lu, Zhiwu}, title = {Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7827-7836} }
FedSST: Rethinking Fair Federated Graph Learning under Structural Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dingyi}, title = {FedSST: Rethinking Fair Federated Graph Learning under Structural Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10335-10345} }
Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jingze and Zhang, Quan and Suo, Hongfei and Cai, Zeqiang and Chen, Hongbo}, title = {Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12259-12268} }
Scalable Trajectory Generation for Whole-Body Mobile Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Yida and Chang, Xinhai and Liu, Xin and Jiao, Ziyuan and Zhu, Yixin}, title = {Scalable Trajectory Generation for Whole-Body Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1798-1808} }
UniDef: Universal Defense Against Unauthorized Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Meng, Lingzhuang and Lv, Xiang and Wu, Mengyao and Chen, Xinyuan and Zhang, Qiao and Liu, Chang and Qiao, Yuanjian and Dong, Chao}, title = {UniDef: Universal Defense Against Unauthorized Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8631-8640} }
CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Huang, Guanhong and Fu, Rong and Jian, Runmin and Gan, Zhongxue and Ouyang, Chun}, title = {CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1606-1615} }
Optical Diffraction-based Convolution for Semiconductor Lithography-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2026_CVPR, author = {Son, Young-Han and Shin, Dong-Hee and Lee, Deok-Joong and Lee, Hyun Jung and Kam, Tae-Eui}, title = {Optical Diffraction-based Convolution for Semiconductor Lithography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12458-12468} }
GSNR: Graph Smooth Null-Space Representation for Inverse Problems-
[pdf]
[supp]
[bibtex]@InProceedings{Gualdron-Hurtado_2026_CVPR, author = {Gualdr\'on-Hurtado, Romario and Jacome, Roman and Su\'arez, Rafael S. and Arguello, Henry}, title = {GSNR: Graph Smooth Null-Space Representation for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12469-12479} }
U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiang and Gao, Feng and Zhang, Yong and Pang, Youxin and Xiaoming, Xu and Kang, Zhuoliang and Wei, Xiaoming and Liu, Yebin}, title = {U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10874-10886} }
Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Haonan and Luo, Yuxuan and Lian, Zhouhui}, title = {Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {517-528} }
eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haojie and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8056-8066} }
WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Hao and Taymanov, Alexey and Zhang, Tong and Kumar, Aviral and Whitehead, Spencer}, title = {WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12248-12258} }
Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ting and Wang, Qilong and Hou, Qibin and Hu, Qinghua}, title = {Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10699-10709} }
The Midas Touch for Metric Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yu and Guo, Zizhan and Xiong, Zuyi and Zhang, Haoran and Feng, Yi and Zhao, Hongbo and Wang, Hanli and Fan, Rui}, title = {The Midas Touch for Metric Depth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5804-5813} }
Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zhengjian and Li, Yongzhi and Gao, Xinyuan and Chen, Quan and Jiang, Peng and Lu, Yanye}, title = {Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7707-7718} }
Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Hongtao and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Hu, Xiantao and Xue, Yuanliang and Song, Shuxiang}, title = {Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13615-13625} }
DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tojo_2026_CVPR, author = {Tojo, Kenji and Bickel, Bernd and Umetani, Nobuyuki}, title = {DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8353-8363} }
Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Yongjie and Wang, Zhouxia and Liu, Yang and Luo, Kaijun and Wen, Yifan and Dai, Mingtong and Chen, Weixing and Chen, Ziliang and Liu, Lingbo and Li, Guanbin and Lin, Liang}, title = {Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13386-13396} }
Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Endo_2026_CVPR, author = {Endo, Mark and Yeung-Levy, Serena}, title = {Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {778-788} }
FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Da and Yang, Xuesong and Guo, Zonghao and Zhang, Yichen and Chen, Chi and Zhang, Yidan and Yao, Yuan and Wan, Fang and Ke, Wei and Sun, Maosong}, title = {FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9804-9814} }
Vinedresser3D: Towards Agentic Text-guided 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chi_2026_CVPR, author = {Chi, Yankuan and Li, Xiang and Huang, Zixuan and Rehg, James Matthew}, title = {Vinedresser3D: Towards Agentic Text-guided 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12673-12683} }
VideoMaMa: Mask-Guided Video Matting via Generative Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Sangbeom and Oh, Seoung Wug and Huang, Jiahui and Yoon, Heeji and Kim, Seungryong and Lee, Joon-Young}, title = {VideoMaMa: Mask-Guided Video Matting via Generative Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3845-3855} }
WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Sicheng and Wan, Rui and Leng, Yifei and Liang, Gaoning and Ling, Li and Shang, Yanyi and Kong, Dehan}, title = {WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6209-6218} }
EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jiafei and Zhou, Fengwei and Qu, Jin and Li, Wenjin Jason and Wu, Tong and Xue, Gengjian and Zhao, Zhikang and Wei, Daomin and Lu, Yichao and Na, Bailin}, title = {EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3532-3542} }
The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Guannan and Zhou, Da-Wei and Li, Zhenguo and Ye, Han-Jia}, title = {The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3866-3875} }
Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Xu, Zikun and Zhang, Yuner and Jiang, Zhongwei and Lu, Chenyang and Yang, Shuocheng and Wang, Yuxuan and Zhong, Jiaru and Zhang, Chuang and Xu, Shaobing and Wang, Jianqiang}, title = {Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11599-11609} }
Momentum Memory for Knowledge Distillation in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yongxin and Lu, Hao and Koyun, Onur C. and Zhu, Zhengjie and Demir, Muhammet F. and Gurcan, Metin N.}, title = {Momentum Memory for Knowledge Distillation in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6889-6899} }
Efficiently Reconstructing Dynamic Scenes One D4RT at a Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuhan and Le Moing, Guillaume and Koppula, Skanda and Rocco, Ignacio and Momeni, Liliane and Xie, Junyu and Sun, Shuyang and Sukthankar, Rahul and Barral, Jo\"elle K. and Hadsell, Raia and Ghahramani, Zoubin and Zisserman, Andrew and Zhang, Junlin and Sajjadi, Mehdi S. M.}, title = {Efficiently Reconstructing Dynamic Scenes One D4RT at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7382-7392} }
Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models-
[pdf]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jiajia and He, Yujia and Hou, Yuhan and Qi, Hang and Wang, Sihua and Shi, Jincheng and Li, Kwok Fung and Zheng, Zibin and Wu, Weibin}, title = {Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7719-7729} }
TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2026_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7100-7110} }
Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8503-8513} }
Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haoyan and Liu, Yahao and Lei, Yinjie and Duan, Lixin and Li, Wen}, title = {Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3143-3153} }
CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yongxin and Yang, Zhicheng and Cao, Meng and Han, Mingfei and Lin, Haokun and Zhu, Yingying and Chang, Xiaojun and Liang, Xiaodan}, title = {CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11976-11986} }
Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yi and Wan, Yi and Yu, Lei and Xia, Panwang and Wu, Qiong and Pei, Yingying and Huang, Xuejun and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Zhang, Yongjun}, title = {Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6443-6452} }
Extend3D: Town-Scale 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Seungwoo and Kim, Jinmo and Park, Jaesik}, title = {Extend3D: Town-Scale 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5892-5901} }
A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Huijie and Cui, Shuhao and Cao, Haoxiang and Ma, Shuai and Wu, Kai and Kang, Guoliang}, title = {A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1-10} }
Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shiqin and Chen, Haoyang and Huang, Huaizhou and He, Yinkan and Sun, Dongfang and Chen, Xiaoqing and Liu, Xingyu and Wang, Zheng and Zhao, Kaiyan}, title = {Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3815-3824} }
GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Jingjing and Han, Boyao and Shi, Chen and Xiao, Lei and Yang, Long and Shi, Shaoshuai and Jiang, Li}, title = {GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13529-13539} }
PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7197-7208} }
Event-based Motion Deblurring with Unpaired Data-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Event-based Motion Deblurring with Unpaired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {882-891} }
DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bei and Li, Gaolei and Wu, Jun and Li, Jianhua}, title = {DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1523-1532} }
VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yang and Wu, Feize and Chen, Dave Zhenyu and Zhong, Yingji and Hong, Lanqing and Xu, Dan}, title = {VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4708-4717} }
Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Yang, Sen and Duan, Boqiang and Dai, Ming and Zhang, Wei and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Wang, Hanli}, title = {Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5175-5186} }
UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuanrui and Zou, Yingshuang and Wu, ZhengXian and Ling, Yonggen and Yang, Yuxiao and Wang, Ziwei}, title = {UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4667-4676} }
DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zitong and Zhao, Hang and Zhou, Qianyu and Lu, Xuequan and Li, Xiangtai and Yang, Hao and Yang, Bo and Song, Yiren}, title = {DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4624-4634} }
ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Anzhe and Duan, Shukai and Li, Shixuan and Yin, Chenzhong and Cheng, Mingxi and Ping, Heng and Chattopadhyay, Tamoghna and Thomopoulos, Sophia I. and Nazarian, Shahin and Thompson, Paul and Bogdan, Paul}, title = {ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12997-13006} }
Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zhenghao and Ding, Wenhao and You, Yurong and Chen, Yuxiao and Luo, Wenjie and Tian, Thomas and Cao, Yulong and Sharma, Apoorva and Xu, Danfei and Ivanovic, Boris and Li, Boyi and Wang, Yan and Pavone, Marco}, title = {Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4022-4031} }
Dynamic Momentum Recalibration in Online Gradient Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zhipeng and Yu, Rui and Chang, Guisong and Li, Ying and Zhang, Yu and Li, Dazhou}, title = {Dynamic Momentum Recalibration in Online Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12902-12912} }
AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Zhen and Tao, Xian and Bao, Xiaoyi and Wang, Dingrong and Qu, ShiChen and Zhang, Zhengtao and Wang, Xingang}, title = {AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14126-14136} }
Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Haoru and He, Tairan and Wang, Zi and Ben, Qingwei and Xiao, Wenli and Luo, Zhengyi and Da, Xingye and Casta\~neda, Fernando and Shi, Guanya and Sastry, Shankar and Fan, Linxi and Zhu, Yuke}, title = {Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6642-6652} }
STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peixuan and Jia, Zijian and Liu, Kaiqi and Weng, Shuchen and Li, Si and Shi, Boxin}, title = {STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {659-669} }
Affordance-First Decomposition for Continual Learning in Video-Language Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{xu_2026_CVPR, author = {xu, Mengzhu and Liu, Hanzhi and Peng, Ningkang and Chen, Qianyu and Xiao, Canran}, title = {Affordance-First Decomposition for Continual Learning in Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3908-3919} }
How Much 3D Do Video Foundation Models Encode?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zixuan and Li, Xiang and Lv, Zhaoyang and Rehg, James M.}, title = {How Much 3D Do Video Foundation Models Encode?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {384-394} }
AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Bhat_2026_CVPR, author = {Bhat, S Divakar and More, Amit Popat and Soni, Mudit and Aggarwal, Bhuvan}, title = {AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10840-10850} }
Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Baoteng and Zang, Xianghao and Wang, Xinran and Na, Xiangyu and He, Zhixiang and Sun, Hao and Zhang, Chi and He, Zhongjiang and Cao, Tianwei and Liang, Kongming and Ma, Zhanyu}, title = {Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {561-571} }
Towards Sparse Video Understanding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chenwei and Ye, Zhen and Wu, Shang and Li, Weijian and Wang, Zihan and Xia, Zhuofan and Lu, Lie and Maneriker, Pranav and Du, Fan and Li, Manling and Liu, Han}, title = {Towards Sparse Video Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11357-11368} }
TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Yiheng and Lin, Yi and Huang, Shilong and Yang, Xiyan and Yang, Xin}, title = {TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6951-6961} }
Prompt-Free Universal Region Proposal Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qihong and Liu, Changhan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Prompt-Free Universal Region Proposal Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13080-13090} }
Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quanyu and Han, Zhongyi and Sun, Hao and Gong, Yongshun and Wang, Xiaoyan and Yin, Yilong and Li, Shuo}, title = {Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {789-800} }
Fast Spatial Tracking with Visual Geometry Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chengjie and Wu, Guile and Bai, Dongfeng and Liu, Bingbing}, title = {Fast Spatial Tracking with Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {374-383} }
Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jun and Lou, Xuhang and Wang, Jinpeng and Wang, Yuting and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9710-9721} }
VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jingyang and Wu, Jialian and Liu, Jiang and Sun, Ximeng and Wang, Ze and Yu, Xiaodong and Luo, Jiebo and Liu, Zicheng and Barsoum, Emad}, title = {VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5465-5475} }
EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saroha_2026_CVPR, author = {Saroha, Abhishek and Zeng, Huajian and Zuo, Xingxing and Cremers, Daniel and Wang, Xi}, title = {EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4332-4342} }
VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Xinyao and He, Qiyuan and Xu, Kai and Qu, Xiaoye and Li, Yicong and Wei, Wei and Yao, Angela}, title = {VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12787-12797} }
GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Bai, Xi and Xu, Xinglong and Li, Siyuan and Sun, Linzhuang and Yu, Bihui and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5199-5210} }
NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction-
[pdf]
[supp]
[bibtex]@InProceedings{Zarar_2026_CVPR, author = {Zarar, Muhammad and Zhang, Mingzheng and Zhang, Xiaowang and Feng, Zhiyong}, title = {NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11654-11663} }
Learning to Act Robustly with View-Invariant Latent Actions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Youngjoon and Chun, Junha and Kim, Taesup}, title = {Learning to Act Robustly with View-Invariant Latent Actions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6781-6790} }
Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing-
[pdf]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Wenxue and Li, Hualin and Qin, Yuhang and Xu, Yifu and Fan, Xiaopeng and Zhao, Debin}, title = {Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5283-5293} }
Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Mingxuan and Li, Shuang and Zhang, Yutang and Geng, Jing and Shen, Yirui and Kang, Jingxuan and Zhuang, Fuzhen and Wang, Shuigen}, title = {Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2146-2155} }
Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Zheng and Meng, Debin and Miao, Yunqi and Zhang, Zhensong and Xu, Songcen and Patras, Ioannis and Song, Jifei}, title = {Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4656-4666} }
S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation-
[pdf]
[bibtex]@InProceedings{Qing_2026_CVPR, author = {Qing, Yuhao and Wang, Yueying and Chen, Chaoyang and Zhang, Weidong and Wen, Jie and Xu, Xin}, title = {S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6293-6303} }
Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhe and Chi, Cheng and Wei, Yangyang and Zhu, Boan and Huang, Tao and Sun, Zhenguo and Peng, Yibo and Wang, Pengwei and Wang, Zhongyuan and Liu, Fangzhou and Xu, Chang and Zhang, Shanghang}, title = {Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {956-965} }
3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhicheng and Yu, Haoyi and Li, Boyan and Zhang, Dayou and Cao, Zijian and Gong, Tianyi and Liu, Junhua and Cui, Shuguang and Wang, Fangxin}, title = {3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7244-7255} }
Spherical Leech Quantization for Visual Tokenization and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yue and Jiang, Hanwen and Xu, Zhenlin and Yang, Chutong and Adeli, Ehsan and Kraehenbuehl, Philipp}, title = {Spherical Leech Quantization for Visual Tokenization and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12913-12923} }
MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxuan and Xu, Wei and Guo, Qi}, title = {MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {209-219} }
OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhijia and Li, Jiaming and Chen, Weikai and Zhang, Yanhao and Lu, Haonan and Li, Guanbin}, title = {OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2821-2831} }
Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, JangHyeon and Dias, Philipe Ambrozio and Chiang, Yao-Yi and Lunga, Dalton}, title = {Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1585-1595} }
VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yikang and Fan, Rui}, title = {VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4953-4962} }
TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chuang and Cao, Yichao and Su, Xiu and Zhu, Haogang}, title = {TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8545-8555} }
A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Mirza_2026_CVPR, author = {Mirza, Mujtaba Hussain and D'Orazio, Antonio and Melamed, Odelia and Masi, Iacopo}, title = {A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8598-8609} }
TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yearang and Kim, Ho-Joong and Lee, Seong-Whan}, title = {TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2843-2852} }
Emergent Outlier View Rejection in Visual Geometry Grounded Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jisang and Hong, Sunghwan and Jung, Jaewoo and Jang, Wooseok and An, Honggyu and Wang, Qianqian and Kim, Seungryong and Feng, Chen}, title = {Emergent Outlier View Rejection in Visual Geometry Grounded Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {427-437} }
TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Harold Haodong and Lan, Disen and Shu, Wen-Jie and Liu, Qingyang and Wang, Zihan and Chen, Sirui and Cheng, Wenkai and Chen, Kanghao and Zhang, Hongfei and Zhang, Zixin and Guo, Rongjin and Cheng, Yu and Chen, Ying-Cong}, title = {TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11403-11413} }
Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Iliash_2026_CVPR, author = {Iliash, Denys and Liu, Jiayi and Fokin, Egor and Wu, Qirui and Amiri, Ali Mahdavi and Savva, Manolis and Chang, Angel X.}, title = {Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8932-8942} }
Z-Order Transformer for Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Can and Liu, Lei and Jiang, Wei and Xu, Dong}, title = {Z-Order Transformer for Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7362-7371} }
MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xiaoyu and Wang, Chenyang and Wang, Jing and Zheng, Shunyuan and Meng, Quanling and Zhang, Shengping}, title = {MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1895-1905} }
DepthFocus: Controllable Depth Estimation for See-Through Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Junhong and Kim, Jimin and Kim, Minwook and Min, Cheol-Hui and Jeon, Youngpil and Choi, Minyong}, title = {DepthFocus: Controllable Depth Estimation for See-Through Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12595-12605} }
Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yanpeng and Hao, Jing and Zhu, Ke and Liu, Jiang-Jiang and Li, Xiaofan and Zhao, Na and Li, Zechao and Wang, Jingdong}, title = {Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1683-1694} }
NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Dengdi and Zhou, Xiaoya and Wang, Xiao and Si, Hao and Lyu, Wanli and Tang, Jin and Luo, Bin}, title = {NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6147-6156} }
Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jialiang and Tong, Junlong and Lin, Junyan and Wu, Hao and Sun, Yirong and Ma, Yunpu and Shen, Xiaoyu}, title = {Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11998-12008} }
From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Yuqing and Yang, Yuchen and Yu, Rui and Li, Weilong and Guo, Xu and Yan, Huaicheng and Wang, Wei and Sun, Xiao}, title = {From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6878-6888} }
Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Bowen and Dewan, Shaurya and Birchfield, Stan}, title = {Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7513-7524} }
Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Qian, Gordon Guocheng and Wang, Kuan-Chieh Jackson and Nemchinov, Egor and Haji-Ali, Moayed and Guler, Riza Alp and Menapace, Willi and Skorokhodov, Ivan and Kag, Anil and Zhu, Jun-Yan and Tulyakov, Sergey}, title = {Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8194-8204} }
LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yichao and Miao, Qiaowei and Quan, Jinsheng and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9558-9569} }
Representing 3D Faces with Learnable B-Spline Volumes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chandran_2026_CVPR, author = {Chandran, Prashanth and Wang, Daoye and Bolkart, Timo}, title = {Representing 3D Faces with Learnable B-Spline Volumes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13824-13834} }
ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Zhaxizhuoma, Zhaxizhuoma and Jiang, Hongru and Xia, Junjie and Zhang, Hongquan and Du, Jinda and Zhou, Yunsong and Zeng, Jia and Hao, Ce and Ren, Jieji and Yu, Qiaojun and Lu, Cewu and Qiao, Yu and Pang, Jiangmiao}, title = {ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8911-8920} }
TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Rui and Cao, Haozhi and Deng, Tianchen and Hu, Tianxin and Guo, Weixiang and Yuan, Shenghai and Xie, Lihua}, title = {TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11881-11890} }
VISTA: A Test-Time Self-Improving Video Generation Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Do Xuan and Wan, Xingchen and Nakhost, Hootan and Lee, Chen-Yu and Pfister, Tomas and Arik, Sercan \"O.}, title = {VISTA: A Test-Time Self-Improving Video Generation Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6021-6032} }
Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Lingyun and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8342-8352} }
MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Yu, Hu and Ma, Xiaoxiao and Pan, Yaning and Xu, Hang and Huang, Jie and Zhao, Feng}, title = {MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5956-5966} }
MapRoute:Precise-Concept Erasing Mappers via Semantic Routing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sihao and Liang, Baixi and Xia, Shuohong and Yang, Yunyun}, title = {MapRoute:Precise-Concept Erasing Mappers via Semantic Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10187-10196} }
Not All Birds Look The Same: Identity-Preserving Generation For Birds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Aaron and Saha, Oindrila and Maji, Subhransu}, title = {Not All Birds Look The Same: Identity-Preserving Generation For Birds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1983-1993} }
Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zihao and Wu, Aming and Li, Yang and Han, Yahong and Shen, Jialie}, title = {Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10006-10015} }
Discriminative Perception via Anchored Description for Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tao and Zhou, Qing and Li, Yanliang and Wang, Qi}, title = {Discriminative Perception via Anchored Description for Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13189-13198} }
Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Ramanagopal, Mani and Narasimhan, Srinivasa}, title = {Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {199-208} }
DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinkui and Zhang, Yifan and Liu, Sai and Wang, Naibo and Cheng, Guanjie and Xu, Yueshen and Liu, Chang and Deng, Shuiguang and Yin, Jianwei}, title = {DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1020-1030} }
Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Hailin and Yang, Yifan and Shu, Jiazhi and Huang, Zixiong and Chen, Qi and Du, Qing and Tan, Mingkui}, title = {Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4087-4096} }
From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Gentile_2026_CVPR, author = {Gentile, Francesco and Dall'Asen, Nicola and Tonini, Francesco and Mancini, Massimiliano and Vaquero, Lorenzo and Ricci, Elisa}, title = {From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2895-2906} }
Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2026_CVPR, author = {Gui, Yuan and Luo, Hongchen and Wang, Jiao and Qu, Liqi}, title = {Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10689-10698} }
Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature-
[pdf]
[supp]
[bibtex]@InProceedings{Esfeh_2026_CVPR, author = {Esfeh, Mohammad Mahdi Kazemi and Yan, Qi and Zhang, Yongxing and Gholami, Zahra and Liao, Renjie and Abolmaesumi, Purang}, title = {Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12977-12986} }
SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Muye and Zhang, Lingling and Li, Yifei and Wu, Yaqiang and Liu, Jun}, title = {SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4738-4748} }
P-Flow: Prompting Visual Effects Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shou, Mike Zheng}, title = {P-Flow: Prompting Visual Effects Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9149-9160} }
One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fa_2026_CVPR, author = {Fa, Zhan and Duan, Yue and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11106-11115} }
GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jing and Liang, Jiajun and Liu, Jie and Liu, Henglin and Liu, Gongye and Zheng, Jun and Pang, Wanyuan and Ma, Ao and Xie, Zhenyu and Wang, Xintao and Wang, Meng and Wan, Pengfei and Liang, Xiaodan}, title = {GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5988-5998} }
Efficient Weighted Sampling via Score-based Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Heasung and Lee, Taekyun and Kim, Hyeji and De Veciana, Gustavo}, title = {Efficient Weighted Sampling via Score-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1155-1166} }
FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Shitong and Gu, Yufei and Xie, Zeke}, title = {FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2104-2114} }
Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jialei and Liu, Li and Zhang, Jiehua and Xie, Yuhang and Liu, Yongxiang and Chen, Jiangming and Cheng, Ming-Ming}, title = {Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13264-13274} }
Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Rouyi and Wu, Yangzhi and Wen, Jiajun and Gao, Can and Liu, Feng and Lai, Zhihui and Shen, Linlin}, title = {Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6065-6074} }
HandX: Scaling Bimanual Motion and Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zimu and Zhang, Yucheng and Xu, Xiyan and Wang, Ziyin and Xu, Sirui and Zhou, Kai and Zhou, Bing and Guo, Chuan and Wang, Jian and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {HandX: Scaling Bimanual Motion and Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2274-2284} }
Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhijing and Xu, Senyan and Jiang, Ruixuan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8045-8055} }
From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Chenyang and Liu, Jiaming and Chen, Hao and Huang, Runzhong and Wuwu, Qingpo and Li, Xiaoqi and Liu, Zhuoyang and Li, Ying and Zhang, Renrui and Jia, Peng and Heng, Pheng-Ann and Zhang, Shanghang}, title = {From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13540-13552} }
RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyeonjeong and Xiong, Peixi and Ruan, Xiaoqian and Jia, Dian and Yu, Pei and Tang, Wei}, title = {RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11556-11566} }
Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zirui and Wang, Xin and Zhang, Yipeng and Chen, Hong and Zheng, Kecheng and Zhu, Wenwu}, title = {Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {636-646} }
Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuang and Deng, Chao and Chen, Hang and Liu, Liqun and Hu, Zhenyu and Cao, Te and Xue, Mengge and Chen, Yuan and Shu, Peng and Yu, Huan and Jiang, Jie}, title = {Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7741-7751} }
ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ming and Shan, Hui and Zheng, Kai and Shen, Chentao and Liu, Siyu and Fu, Yanwei and Chen, Zhen and Huang, Xiangru}, title = {ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4122-4131} }
PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Zhang, Renhao and Wen, Zhiyang and Pan, Siqi and da Silva, Bruno Castro and Zhai, Juan and Ma, Shiqing}, title = {PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7795-7804} }
S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chavan_2026_CVPR, author = {Chavan, Arnav and Lele, Nahush and Bamba, Udbhav and Dayal, Sankalp and Raghunathan, Aditi and Gupta, Deepak}, title = {S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12074-12083} }
CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chenyu and Chen, Hongze and Bao, Jingzhi and Zhu, Lingting and Zhang, Runze and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Luo, Keyang and Wang, Xin}, title = {CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5923-5933} }
DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yaokun and Ding, Lihe and Chen, Xiao and Tan, Guang and Xue, Tianfan}, title = {DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1241-1251} }
Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Keyang and Zhou, Sifan and Xu, Hongbin and Xu, Gang and Yang, Zhifei and Wang, Yikai and Xiao, Zhen and Long, Jieyi and Li, Ming}, title = {Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3219-3230} }
LVLM-Aided Alignment of Task-Specific Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koebler_2026_CVPR, author = {Koebler, Alexander and Kuhn, Lukas and Thon, Ingo and Buettner, Florian}, title = {LVLM-Aided Alignment of Task-Specific Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7837-7846} }
Unified Latent Space for Understanding and Generation via Semantic Auto-encoder-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaojie and Zhao, Yang and Li, Ming and Zhang, Yancheng and Lyu, Zonglin and Chen, Yunpeng and Wang, Rui and Zhou, Daquan}, title = {Unified Latent Space for Understanding and Generation via Semantic Auto-encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2115-2124} }
Talking Together: Synthesizing Co-Located 3D Conversations from Audio-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Mengyi and Chang, Shouchieh and Bai, Ziqian and Liu, Shichen and Zhang, Yinda and Song, Luchuan and Pandey, Rohit and Fanello, Sean and Huang, Zeng}, title = {Talking Together: Synthesizing Co-Located 3D Conversations from Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3965-3977} }
PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Ying and Xiong, Jerry and Yu, Tianjiao and Lourentzou, Ismini}, title = {PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11185-11194} }
Learning Multi-View Spatial Reasoning from Cross-View Relations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Suchae and Song, Jaehwi and Lee, Haeone and Kim, Hanna and Kim, Jian and Lee, Dongjun and Shin, Dong Kyu and Kim, Changyeon and Hahm, Dongyoon and Jin, Woogyeol and Choi, Juheon and Lee, Kimin}, title = {Learning Multi-View Spatial Reasoning from Cross-View Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2570-2581} }
4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Mazur_2026_CVPR, author = {Mazur, Kirill and Taher, Marwan and Davison, Andrew J.}, title = {4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7372-7381} }
HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosh_2026_CVPR, author = {Rosh, Green and Kukreja, Prateek and Vishakha, SR and H, Pawan Prasad B}, title = {HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8846-8856} }
Edit-aware RAW reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Punnappurath_2026_CVPR, author = {Punnappurath, Abhijith and Zhao, Luxi and Zhao, Ke and Nguyen, Hue and Grzeszczuk, Radek and Brown, Michael S.}, title = {Edit-aware RAW reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8418-8427} }
View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quan and Cai, Zeqiang and Zhao, Peiming and Wu, Jingze and Wu, Cailun and Chen, Hongbo and Lai, Jianhuang}, title = {View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4383-4392} }
EmoStyle: Emotion-Driven Image Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jingyuan and Bai, Zihuan and Huang, Hui}, title = {EmoStyle: Emotion-Driven Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {594-603} }
Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Lixuan and Liu, Zhongnan and Hamilton, Jesse and Balter, James M. and Park, Jeong Joon and Shen, Liyue}, title = {Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5627-5636} }
STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Jiatao and Shen, Ying and Chen, Tianrong and Dinh, Laurent and Wang, Yuyang and Bautista, Miguel Angel and Berthelot, David and Susskind, Josh and Zhai, Shuangfei}, title = {STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9084-9094} }
PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and Yoo, Sohyun and Wu, Hongrui and Li, Chuan and Xie, Jianwen and Tu, Zhuowen}, title = {PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5881-5891} }
Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Guangchen and Wu, Yirui and Zhu, Wei and Wang, Tao and Zhang, Hao and Li, Bo and Lu, Tong}, title = {Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12354-12363} }
Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Dingkun and Shen, Zehong and Xia, Yan and Pavlakos, Georgios and Shen, Yujun and Zhou, Xiaowei}, title = {Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7187-7196} }
Ego: Embedding-Guided Personalization of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seifi_2026_CVPR, author = {Seifi, Soroush and Gardier, Simon and Dorovatas, Vaggelis and Reino, Daniel Olmeda and Aljundi, Rahaf}, title = {Ego: Embedding-Guided Personalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11674-11683} }
HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and An, Yilu and Chen, Ying and Li, Hao and Ling, Xitong and Liu, Lihao and He, Junjun and Lin, Yuxiang and Wang, Zihui and Yu, Rongshan}, title = {HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5730-5739} }
STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Runze and Song, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7567-7576} }
Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Won Shik and Kim, Ue-Hwan}, title = {Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9626-9636} }
QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zhizhen and Wang, Hesong and Wang, Huan}, title = {QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7536-7545} }
R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qi and Ni, Bolin and Xiang, Shiming and Peng, Houwen}, title = {R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7891-7900} }
Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jiachen and Shanbhag, Hailan and Al Hassanieh, Haitham}, title = {Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1221-1230} }
Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Changlin and Zhang, Jiawei and Liu, Shuhao and Lin, Sihao and Shi, Zeyi and Li, Zhihui and Chang, Xiaojun}, title = {Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5967-5977} }
UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qianfeng and Guan, Qiyuan and Chen, Xiang and Jin, Jiyu and Jin, Guiyue and Dong, Jiangxin}, title = {UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12428-12437} }
FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Guangzhao and Yang, Yanming and Song, Chenxi and Liu, Xiaohong and Zhang, Chi}, title = {FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7805-7815} }
ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Peijie and Ramshankar, Hariharan and Ramisa, Arnau and C, Amit Kumar K and Vidal, Ren\'e and Salaka, Vamsi and Bhagat, Rahul}, title = {ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {529-539} }
World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Eunsu and Park, Junyeong and An, Na Min and Kim, Junseong and Patel, Hitesh Laxmichand and Jin, Jiho and Kruk, Julia and Agarwal, Amit and Panda, Srikant and Ilasariya, Fenal Ashokbhai and Shim, Hyunjung and Oh, Alice}, title = {World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2477-2489} }
NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Ziteng and He, Qiang and Li, Bing and Chen, Feifei and Jin, Hai and Yang, Yun}, title = {NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {156-166} }
SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13802-13811} }
Frequency-domain Manipulation for Face Obfuscation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jintae and Ko, Keunsoo and Kim, Chang-Su}, title = {Frequency-domain Manipulation for Face Obfuscation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10241-10250} }
E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yihong and Liao, Haicheng and Nie, Tong and He, Junlin and Qu, Ao and Chen, Kehua and Ma, Wei and Li, Zhenning and Sun, Lijun and Xu, Chengzhong}, title = {E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10610-10620} }
Back

