Papers
- Back
Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Jie and Ma, Yinchao and Tang, Yuyang and Yang, Dengqing and Yang, Jianpeng and Zhou, Xu and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {Generalizable Structure-Aware Keypoint Correspondence for Category-Unified 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28156-28166} }
DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhengxian and Xie, Fei and Xue, Xutao and Zhang, Rui and Huang, Taicheng and Liu, Yang and Ji, Mengqi and Yu, Tao}, title = {DirectFisheye-GS: Enabling Native Fisheye Input in Gaussian Splatting with Cross-View Joint Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4942-4952} }
CompBench: Benchmarking Complex Instruction-guided Image Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Bohan and Huang, Wenxuan and Tang, Yuntian and Qiao, Junbo and Liao, Jincheng and Cao, Shaosheng and Zhao, Fei and Feng, Zhaopeng and Gu, Zhouhong and Yin, Zhenfei and Bai, Lei and Ouyang, Wanli and Chen, Lin and Zhao, Fei and Wang, Zihan and Xie, Yuan and Lin, Shaohui}, title = {CompBench: Benchmarking Complex Instruction-guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1112-1122} }
Choreographing a World of Dynamic Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yanzhe and Geng, Chen and Dharmarajan, Karthik and Zhang, Yunzhi and Alzayer, Hadi and Wu, Shangzhe and Wu, Jiajun}, title = {Choreographing a World of Dynamic Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32647-32658} }
Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlin and Xiong, Ruiqin and Xie, Jiyu and Zhu, Zhenkun and Yu, Zhaofei and Fan, Xiaopeng and Huang, Tiejun}, title = {Spk2VidNet: A Hierarchical Recurrent Architecture for High-Fidelity Video Reconstruction from Long Spike-Camera Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12140-12149} }
Continual Distillation of Teachers from Different Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Michel_2026_CVPR, author = {Michel, Nicolas and Wang, Maorong and He, Jiangpeng and Yamasaki, Toshihiko}, title = {Continual Distillation of Teachers from Different Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10810-10819} }
GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shekhar_2026_CVPR, author = {Shekhar, Shivanshu and Bhattacharya, Uttaran and Addanki, Raghavendra and Tanjim, Mehrab and Sarkhel, Somdeb and Zhang, Tong}, title = {GT-SVJ: Generative-Transformer-Based Self-Supervised Video Judge For Efficient Video Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9847-9858} }
Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Lu and Yin, Guosheng}, title = {Beyond Euclidean Gossip: KL-Barycentric Consensus on Heterogeneous and Imbalanced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6167-6175} }
HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Bassole_2026_CVPR, author = {Bassole, Yipene Cedric Francois and Kim, Sungwoo and Jung, Jiwoo and Sung, Yunsick}, title = {HybridDriveVLA: Vision-Language-Action Model with Visual CoT reasoning and ToT Evaluation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32421-32430} }
Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Seo, Hoigi and Chun, Se Young}, title = {Training-free, Perceptually Consistent Low-Resolution Previews with High-Resolution Image for Efficient Workflows of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4547-4557} }
Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shifeng and Li, Yihui and Liao, Jun and Yang, Hongyu and Huang, Di}, title = {Catalyst4D: High-Fidelity 3D-to-4D Scene Editing via Dynamic Propagation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29792-29802} }
Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zaishuo and Lu, Yukuan and Li, Xinyi and Xu, Yifan and Chen, Yubei}, title = {Cloning Deterministic Worlds: The Critical Role of Latent Geometry in Long-Horizon World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32602-32612} }
CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Ke and Yu, Haiyang and Chen, Zhuofan and Yao, Zhengtao and Jia, Weitao and Ge, Xiaodong and Tang, Jingqun and Cui, Benlei and Li, Bin and Xue, Xiangyang}, title = {CME-CAD: Heterogeneous Collaborative Multi-Expert Reinforcement Learning for CAD Code Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39272-39281} }
Unique Lives, Shared World: Learning from Single-Life Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Tengda and Ebrahimi, Sayna and Gokay, Dilara and Ku, Li Yang and Ovsjanikov, Maks and Babukova, Iva and Zoran, Daniel and Patraucean, Viorica and Carreira, Joao and Zisserman, Andrew and Damen, Dima}, title = {Unique Lives, Shared World: Learning from Single-Life Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24021-24030} }
Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sinha_2026_CVPR, author = {Sinha, Abhishek Kumar and Dube, Nitant and Biswas, Soma}, title = {Quantized Residuals to Continuous Prompts for Few-Shot Class Incremental Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3856-3865} }
EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bui_2026_CVPR, author = {Bui, Minh-Quan Viet and Park, Jongmin and Gonzalez, Juan Luis and Moon, Jaeho and Oh, Jihyong and Kim, Munchurl}, title = {EcoSplat: Efficiency-controllable Feed-forward 3D Gaussian Splatting from Multi-view Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26010-26020} }
UniComp: Rethinking Video Compression Through Informational Uniqueness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Chao and Chen, Shimin and Lin, Minliang and Qiao, Limeng and Wan, Guanglu and Ma, Lin}, title = {UniComp: Rethinking Video Compression Through Informational Uniqueness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18609-18618} }
AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Xiya and Zhao, Qinglin and Feng, Li}, title = {AD-GBC: Anisotropic Granular-Ball Skip-Connection Refiner for UNet-Based Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1418-1427} }
White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuwei and Tan, Lei and Tan, Robby T.}, title = {White-Balance First, Adjust Later: Cross-Camera Color Constancy via Vision-Language Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1331-1341} }
Reallocating Attention Across Layers to Reduce Multimodal Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Haolang and Chu, Bolun and Fu, WeiYe and Nan, Guoshun and Liu, Junning and Pan, Minghui and Li, Qiankun and Yu, Yi and Wang, Hua and Wang, Kun}, title = {Reallocating Attention Across Layers to Reduce Multimodal Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4157-4167} }
PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zekai and Zheng, Xu}, title = {PanoEnv: Exploring 3D Spatial Intelligence in Panoramic Environments with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9647-9657} }
Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Hoigi and Lee, Byung Hyun and Cho, Jaehyun and Lim, Sungjin and Chun, Se Young}, title = {Erasing Thousands of Concepts: Towards Scalable and Practical Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10262-10272} }
TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yanan and Yan, Yuhan and Chen, Tailai and Chi, Zhixiang and Wu, ZiZhang and Jin, Yi and Wang, Yang and Li, Zhenbo}, title = {TALON: Test-time Adaptive Learning for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22259-22269} }
SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chng_2026_CVPR, author = {Chng, Yong Xien and Hu, Tao and Tong, Wenwen and Li, Xueheng and Chen, Jiandong and Yu, Haojia and Lu, Jiefan and Guo, Hewei and Deng, Hanming and Xie, Chengjun and Huang, Gao and Lu, Lewei}, title = {SenseSearch: Empowering Vision-Language Models with High-Resolution Agentic Search-Reasoning via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26508-26517} }
REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Di and Liu, Liu and Huang, Anran and Liu, Yuyan and Yu, Qiaojun and Liu, Shaofan and Song, Liangtu and Lu, Cewu}, title = {REArtGS++: Generalizable Articulation Reconstruction with Temporal Geometry Constraint via Planar Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1177-1186} }
Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Chenwei and Li, Baoting and Zhang, Xuchong and Wei, Mingzhuo and Lin, Bochen and Sun, Hongbin}, title = {Quant Experts: Token-aware Adaptive Error Reconstruction with Mixture of Experts for Large Vision-Language Models Quantization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24716-24726} }
Efficient and High-Fidelity Omni Modality Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2026_CVPR, author = {Huynh, Chuong and Luong, Manh and Shrivastava, Abhinav}, title = {Efficient and High-Fidelity Omni Modality Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8770-8780} }
SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Mingjie and He, Guangjun and Xu, Dongli and Lin, Youtian and Li, Hongjue and Feng, Pengming and Guan, Jian and Deng, Yue}, title = {SynCLIP: Synonym-Coherent Language-Image Pretraining for Robust Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31524-31533} }
Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xubo and Zhang, Haoyang and He, Fei and Wu, Rui and Shan, Yanhu and Yang, Wen and Yu, Huai}, title = {Dr.Occ: Depth- and Region-Guided 3D Occupancy from Surround-View Cameras for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28610-28619} }
More than the Sum: Panorama-Language Models for Adverse Omni-Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Weijia and Liu, Ruiping and Wei, Jiale and Chen, Yufan and Zheng, Junwei and Zeng, Zichao and Zhang, Jiaming and Li, Qiufu and Shen, Linlin and Stiefelhagen, Rainer}, title = {More than the Sum: Panorama-Language Models for Adverse Omni-Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30874-30884} }
AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xarles_2026_CVPR, author = {Xarles, Artur and Escalera, Sergio and Moeslund, Thomas B. and Clap\'es, Albert}, title = {AdaSpot: Spend Resolution Where It Matters for Precise Event Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24010-24020} }
Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Wen and Chen, Hao and Zhang, Shiliang}, title = {Prompt-Anchored Vision-Text Distillation for Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18503-18512} }
What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1-
[pdf]
[supp]
[bibtex]@InProceedings{Pierard_2026_CVPR, author = {Pi\'erard, S\'ebastien and Deli\`ege, Adrien and Van Droogenbroeck, Marc}, title = {What Is the Optimal Ranking Score Between Precision and Recall? We Can Always Find It and It Is Rarely F1}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9722-9731} }
From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruikun and Yang, Yan and Pan, Liyuan}, title = {From Spots to Pixels: Dense Spatial Gene Expression Prediction from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19791-19800} }
Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Mangyu and Lee, Jaewon and Lee, Seongwon and Kim, Euntai}, title = {Rethinking Pose Refinement in 3D Gaussian Splatting under Pose Prior and Geometric Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25958-25968} }
RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qiucheng and Shi, Jing and Jenni, Simon and Kafle, Kushal and Wang, Tianyu and Chang, Shiyu and Zhao, Handong}, title = {RetouchIQ: MLLM Agents for Instruction-Based Image Retouching with Generalist Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12279-12288} }
Anomaly-Related Residual Fields for Cross-domain Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Kewei and Xie, Jiayi and Shen, Zhengda and Qin, Weijun and Jia, Lingxiang and Chen, Kejia and Feng, Zunlei and Bei, Yijun}, title = {Anomaly-Related Residual Fields for Cross-domain Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35617-35627} }
DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Qingji and Dong, Hang and Chen, Mingqin and Zhang, Rui and Wang, Yitong}, title = {DreamSR: Towards Ultra-High-Resolution Image Super-Resolution via a Receptive-Field Enhanced Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38258-38269} }
LongStream: Long-Sequence Streaming Autoregressive Visual Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Chong and Chen, Xianda and Xie, Tao and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Guo, Xiaoyang and Wang, Hao}, title = {LongStream: Long-Sequence Streaming Autoregressive Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {272-283} }
VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosal_2026_CVPR, author = {Ghosal, Soumya Suvra and Kim, Youngeun and Li, Zhuowei and Chaudhry, Ritwick and Xu, Linghan and Zhang, Hongjing and Zablocki, Jakub and Xing, Yifan and Zhang, Qin}, title = {VisRef: Visual Refocusing while Thinking Improves Test-Time Scaling in Multi-Modal Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33404-33414} }
Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yi and Yang, Lei and Chen, Bofan and Wang, Shilin}, title = {Enhancing the Security of Visual Speaker Authentication Based on Dynamic Lip-Print Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35462-35471} }
Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances-
[pdf]
[supp]
[bibtex]@InProceedings{Hollidt_2026_CVPR, author = {Hollidt, Dominik and Bendinelli, Tommaso and Holz, Christian}, title = {Ultra Diffusion Poser: Diffusion-Based Human Motion Tracking from Sparse Inertial Sensors and Ranging-based Between-sensor Distances}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7036-7046} }
Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wei-Jin and Zhang, Yue-Yi and Wei, Yi-Lin and Xia, Zhi-Wei and Tan, Juantao and Li, Yuan-Ming and Zhao, Zhilin and Zheng, Wei-Shi}, title = {Beyond Mimicry: Learning Whole-Body Human-Humanoid Interaction from Human-Human Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30740-30749} }
Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Jian and Peng, Qirong and Zhu, Xujie and Xie, Peixing and Chen, Chen and Lu, Haonan}, title = {Pluggable Pruning with Contiguous Layer Distillation for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18640-18650} }
PersonaVLM: Long-Term Personalized Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Chang and Fu, Chaoyou and Zhang, Yifan and Yang, Haihua and Shan, Caifeng}, title = {PersonaVLM: Long-Term Personalized Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15000-15009} }
APPO: Attention-guided Perception Policy Optimization for Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Henghui and Zhou, Chang and Chen, Xi and Hu, Di}, title = {APPO: Attention-guided Perception Policy Optimization for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12269-12278} }
An Efficient Token Compression Framework for Visual Object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Weijing and Liang, Qihua and Zhong, Bineng and Xia, Haiying and Mo, Zhiyi and Song, Shuxiang}, title = {An Efficient Token Compression Framework for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6857-6867} }
GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiajin and Fan, Dongzhe and Ji, Chuanhao and Zha, Daochen and Tan, Qiaoyu}, title = {GraphVLM: Benchmarking Vision Language Models for Multimodal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9491-9500} }
Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ahmed_2026_CVPR, author = {Ahmed, Fatmaelzahraa and Lu, Zhihe and Caro, Gianni and Tabaa, Diram and Hamdy, Mohamed and Abdel-Ghani, Muraam and Al-Ali, Abdulaziz and Arsalan, Muhammad and Balakrishnan, Shidin}, title = {Boundary-Responsive Differentiable Gating for Superpixel-Based Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42137-42146} }
Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights-
[pdf]
[supp]
[bibtex]@InProceedings{Mori_2026_CVPR, author = {Mori, Masafumi and Gongyo, Shinya and Ambai, Mitsuru}, title = {Rethinking Asymmetric Quantization: Hidden Symmetry in Vision Model Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33609-33620} }
Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Wenhan and Wang, Shaopan and Wu, Xiangyu and Hang, Tianchu and Jian, Zhongquan and Wu, Qingqiang}, title = {Hierarchical Enhancement of Semantic Priors for Disentangled Text-Driven Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14844-14853} }
MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shaar_2026_CVPR, author = {Shaar, Shaden and Thymes, Bradon and Chaixanien, Sirawut and Cardie, Claire and Hariharan, Bharath}, title = {MovieRecapsQA: A Multimodal Open-Ended Video Question-Answering Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4537-4546} }
PhysHead: Simulation-Ready Gaussian Head Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabadayi_2026_CVPR, author = {Kabadayi, Berna and Sklyarova, Vanessa and Zielonka, Wojciech and Thies, Justus and Pons-Moll, Gerard}, title = {PhysHead: Simulation-Ready Gaussian Head Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4109-4121} }
Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengli and Zheng, Minghang and Zheng, Feng and Liu, Yang}, title = {Scalable Object Relation Encoding for Better 3D Spatial Reasoning in Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16758-16767} }
Physical Simulator In-the-Loop Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Foo_2026_CVPR, author = {Foo, Lin Geng and Huang, Mark He and Lattas, Alexandros and Moschoglou, Stylianos and Beeler, Thabo and Theobalt, Christian}, title = {Physical Simulator In-the-Loop Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4301-4311} }
Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Fei and Zheng, Xinye and Li, Kun and Wei, Yanyan and Liu, Yuxin and Hu, Ganpeng and Bao, Tong and Yang, Jingwen}, title = {Multimodal Protein Language Models for Enzyme Kinetic Parameters: From Substrate Recognition to Conformational Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15829-15839} }
PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Inseong and Lee, Siwoo and Nam, Seung-Hun and Song, Soohwan}, title = {PR-IQA: Partial-Reference Image Quality Assessment for Diffusion-Based Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37301-37310} }
FloVerse: Floor Plan-Guided Multi-Modal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Weiqi and Dong, Shuangyi and Li, Jiaxin and Guo, Yifei and Wang, Zan and Liang, Wei}, title = {FloVerse: Floor Plan-Guided Multi-Modal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15156-15165} }
ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2026_CVPR, author = {Kuang, Liming and Velikova, Yordanka and Saleh, Mahdi and Zaech, Jan-Nico and Paudel, Danda Pani and Busam, Benjamin}, title = {ConceptPose: Training-Free Zero-Shot Object Pose Estimation using Concept Vectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26582-26592} }
JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yifan and Wang, Juntuo and Qiao, Yuming and Zhang, Xudong and Yu, Chunyang and Li, Yan and Lin, Xiao and Luo, Liang and Meng, Dan}, title = {JoPPO: Hierarchical Photography Assessment via Contrastive Joint Conditional Probabilistic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11684-11693} }
Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Menghao and Zhu, Yiyan and Ren, Pengfei and Sun, Haifeng and Qi, Qi and Zhuang, Zirui and Wang, Huazheng and Zhang, Lei and Liao, Jianxin and Wang, Jingyu}, title = {Fine-VAD: Towards Fine-Grained Video Anomaly Detection via Progressive Cross-Granularity Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35514-35523} }
Functional Mean Flow in Hilbert Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiqi and Sun, Yuchen and Turk, Greg and Zhu, Bo}, title = {Functional Mean Flow in Hilbert Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1928-1938} }
CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jiacheng and Zhou, Zhiyuan and He, Zhuolin and Zhang, Jia and Zhang, Kai and Pu, Jian}, title = {CausalVAD: De-confounding End-to-End Autonomous Driving via Causal Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32124-32133} }
AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tenison_2026_CVPR, author = {Tenison, Irene and Chatterjee, Soumyajit and Kawsar, Fahim and Malekzadeh, Mohammad}, title = {AdaBet: Gradient-free Layer Selection for Efficient Training of Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20233-20242} }
Dual Ascent Diffusion for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Levy, Axel and Wetzstein, Gordon}, title = {Dual Ascent Diffusion for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23622-23631} }
One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bianchi_2026_CVPR, author = {Bianchi, Lorenzo and Pacini, Giacomo and Carrara, Fabio and Messina, Nicola and Amato, Giuseppe and Falchi, Fabrizio}, title = {One Patch to Caption Them All: A Unified Zero-Shot Captioning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5532-5542} }
ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Miao and Zhu, Xiangyu and Wang, Zidu and Liang, Xusheng and Li, Bao and Wu, Jinlin and Zang, Zelin and Lei, Zhen}, title = {ReGenHOI: Unifying Reconstruction and Generation for 3D Human-Object Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42847-42857} }
Masked Region Transformer for Layered Image Generation and Editing at Scale-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhicong and Chen, Jingye and Zhang, Zhao and Zhou, Mohan and Liu, Yuchi and Pu, Yifan and Bai, Yalong and Smith, Ethan and Yuan, Yuhui}, title = {Masked Region Transformer for Layered Image Generation and Editing at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40622-40632} }
Grounded 3D-Aware Spatial Vision-Language Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, An-Chieh and Fu, Yang and Ji, Yatai and Zhu, Ligeng and Zhan, Guanqi and Zhang, Zhuoyang and Yang, Zhaojing and Han, Song and Lu, Yao and Molchanov, Pavlo and Murali, Vidya Nariyambut and Kautz, Jan and Wang, Xiaolong and Yin, Hongxu and Liu, Sifei}, title = {Grounded 3D-Aware Spatial Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16688-16700} }
Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Ziwei and Zeng, Fanhu and Fang, Hongjian and Wang, Rui-Qi and Chen, Renxing and Zhu, Yanan and Chen, Yi and Yang, Peipei and Zhang, Xu-Yao}, title = {Fine-Grained Post-Training Quantization for Large Vision Language Models with Quantization-Aware Integrated Gradients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3500-3510} }
Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Haoxiang and Wang, Tao and Tang, Chenwei and Yuan, Li and Lv, Jiancheng}, title = {Dr. Seg: Revisiting GRPO Training for Visual Large Language Models through Perception-Oriented Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24320-24329} }
GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guanjie and Chen, Chen}, title = {GR-Gauge: Cost-efficient Training Configuration By Gauging the Gradient Redundancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12934-12943} }
DROID-SLAM in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Moyang and Zhu, Zihan and Pollefeys, Marc and Barath, Daniel}, title = {DROID-SLAM in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36498-36508} }
Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jiaqi and Zheng, Xu and Liu, Yang}, title = {Towards Robust Multi-Modal Semantic Segmentation with Teacher-Student Framework and Hybrid Prototype Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27665-27675} }
EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingxuan and Cui, Yiming and He, Yicheng and Wang, Yiwei and Zhang, Shu and Wen, Longyin and Niu, Yulei}, title = {EchoFoley: Event-Centric Hierarchical Control for Video Grounded Creative Sound Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27229-27238} }
Intrinsic Concept Extraction Based on Compositional Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hanyu and Tao, Hong and Huang, Guoheng and Jiang, Jianbin and Chen, Xuhang and Pun, Chi-Man and Wang, Shanhu and Pan, Pan}, title = {Intrinsic Concept Extraction Based on Compositional Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38969-38978} }
Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parihar_2026_CVPR, author = {Parihar, Rishubh and Patashnik, Or and Ostashev, Daniil and Radhakrishnan, Venkatesh Babu and Cohen-Or, Daniel and Wang, Kuan-Chieh Jackson}, title = {Kontinuous Kontext: Continuous Strength Control for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37929-37939} }
AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaofei and Zhang, Yi and Liu, Yumeng and Ma, Yuexin and Shi, Yujiao and He, Xuming}, title = {AffordGrasp: Cross-Modal Diffusion for Affordance-Aware Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15944-15953} }
RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yichen and Peng, Chensheng and Abdelfattah, Mazen and Hu, Yihan and Yang, Jiezhi and Higgins, Eric and Brigden, Ryan and Tomizuka, Masayoshi and Zhan, Wei}, title = {RAYNOVA: Scale-Temporal Autoregressive World Modeling in Ray Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25426-25437} }
SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Xiaogang and Zhang, Jiawei and Liu, Tongfei and Lei, Tao and Wang, Yingbo}, title = {SPEGC: Continual Test-Time Adaptation via Semantic-Prompt-Enhanced Graph Clustering for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8481-8491} }
ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Jiayu and Liu, Xinpeng and Pan, Zhiyi and Long, Shiqiang and Li, Ge}, title = {ExtrinSplat: Decoupling Geometry and Semantics for Open-Vocabulary Understanding in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31019-31028} }
Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Li, Yulu and Fang, Liangda and Zhang, Jusheng and Zheng, Yongsen and Guan, Quanlong and Chen, Xipeng}, title = {Vocabulary Scaling Law: Tuning Open-vocabulary Predictors for Their Openness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3091-3100} }
The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Ziheng and Song, Yiren and Liu, Yaoli and Zhu, Shihao and Hou, Qibin and Cheng, Ming-Ming and Shou, Mike Zheng}, title = {The Consistency Critic: Correcting Inconsistencies in Generated Images via Reference-Guided Attentive Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2035-2046} }
StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xilin and Xian, Xiaole and Yue, Xiangyu and Khan, Muhammad Haris}, title = {StyleDoctor: Towards Specialist Reward Model for Style-centric Generation Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29126-29135} }
Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention-
[pdf]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Shezheng and Li, Shasha and Zhao, Shan and Li, Xiaopeng and Wan, Qian and Wang, Chengyu and Yan, Tianwei and Jun, Ma and Yu, Jie}, title = {Where Does Vision Meet Language? Understanding and Refining Visual Fusion in MLLMs via Contrastive Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10051-10060} }
From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ling and Lu, Yunfan and Ma, Wenzong and Yao, Huizai and Li, Pengteng and Xiong, Hui}, title = {From Events to Clarity: The Event-Guided Diffusion Framework for Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34028-34039} }
Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chubin and Hu, Sujie and Zhu, Jiashu and Wu, Meiqi and Chen, Jintao and Li, Yanxun and Huang, Nisha and Fang, Chengyu and Wu, Jiahong and Chu, Xiangxiang and Li, Xiu}, title = {Taming Preference Mode Collapse via Directional Decoupling Alignment in Diffusion Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12775-12786} }
DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Kim, Jongha and Kim, Sehyung and Ko, Dohwan and Kil, Jihyung and Kim, Hyunwoo J.}, title = {DocPrune: Efficient Document Question Answering via Background, Question, and Comprehension-aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3543-3552} }
Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jun and Xiong, Lizhi and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie and Li, Yong and Xie, Guo-Sen}, title = {Beyond Text Prompts: Precise Concept Erasure through Text-Image Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37653-37663} }
POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Anjie and Peng, Can and Liu, Yuyuan and Noble, J. Alison}, title = {POUR: A Provably Optimal Method for Unlearning Representation via Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10273-10282} }
FlowMotion: Training-Free Flow Guidance for Video Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhen and Xu, Youcan and Xiao, Jun and Chen, Long}, title = {FlowMotion: Training-Free Flow Guidance for Video Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38143-38153} }
Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yifei and Zheng, Wenzhao and Zhang, Yanran and Sun, Runze and Zheng, Yu and Chen, Lei and Zhou, Jie and Lu, Jiwen}, title = {Skyra: AI-Generated Video Detection via Grounded Artifact Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4482-4493} }
Visual Diffusion Models are Geometric Solvers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goren_2026_CVPR, author = {Goren, Nir and Yehezkel, Shai and Dahary, Omer and Voynov, Andrey and Patashnik, Or and Cohen-Or, Daniel}, title = {Visual Diffusion Models are Geometric Solvers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43187-43196} }
Image Diffusion Preview with Consistency Solver-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Fu-Yun and Zhou, Hao and Yuan, Liangzhe and Woo, Sanghyun and Gong, Boqing and Han, Bohyung and Yang, Ming-Hsuan and Zhang, Han and Zhu, Yukun and Liu, Ting and Zhao, Long}, title = {Image Diffusion Preview with Consistency Solver}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43271-43280} }
LATTICE: Democratize High-Fidelity 3D Generation at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Liu, Haolin and Lin, Qingxiang and Huang, Jingwei and Guo, Chunchao and Yue, Xiangyu}, title = {LATTICE: Democratize High-Fidelity 3D Generation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19982-19992} }
A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kerssies_2026_CVPR, author = {Kerssies, Tommie and Berton, Gabriele and He, Ju and Yu, Qihang and Ma, Wufei and de Geus, Daan and Dubbelman, Gijs and Chen, Liang-Chieh}, title = {A Frame is Worth One Token: Efficient Generative World Modeling with Delta Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27978-27988} }
Resolving the Identity Crisis in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Farhadzadeh, Farzad and Hayat, Munawar and Porikli, Fatih}, title = {Resolving the Identity Crisis in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36703-36712} }
Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Teng and Yu, Zhentao and Zhang, Guozhen and Su, Zihan and Zhou, Zhengguang and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Yi, Ran}, title = {Harmony: Harmonizing Audio and Video Generation through Cross-Task Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16085-16095} }
MAMMA: Markerless Accurate Multi-person Motion Acquisition-
[pdf]
[supp]
[bibtex]@InProceedings{Velasquez_2026_CVPR, author = {Velasquez, Hanz Cuevas and Yiannakidis, Anastasios and Shin, Soyong and Becherini, Giorgio and H\"oschle, Markus and Tesch, Joachim and Obersat, Taylor and Alexiadis, Tsvetelina and Halilaj, Eni and Black, Michael J.}, title = {MAMMA: Markerless Accurate Multi-person Motion Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7175-7186} }
Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Zhiwei and Chen, Peilin and Shen, Qiangqiang and Li, Bo and Wang, Shiqi}, title = {Dual Graph Regularized Deep Unfolding Network for Guided Depth Map Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16322-16332} }
Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seongmin and Song, Byung Cheol}, title = {Towards Robust Vision Transformers: Path Dependency Analysis and a Simple Two-Stage Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15669-15678} }
First Frame Is the Place to Go for Video Content Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Li, Zongxia and Liu, Zhichao and Shi, Guangyao and Wu, Xiyang and Liu, Fuxiao and Ferm\"uller, Cornelia and Feng, Brandon Y. and Aloimonos, Yiannis}, title = {First Frame Is the Place to Go for Video Content Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9243-9252} }
High-Quality and Efficient Turbulence Mitigation with Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoran and Ding, Jian and Duan, Yuxing and Liu, Haoyue and Chen, Gang and Chang, Yi and Yan, Luxin}, title = {High-Quality and Efficient Turbulence Mitigation with Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29514-29525} }
HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiashu and Han, Xumeng and Wei, Zhaoyang and Wang, Zipeng and Wang, Kuiran and Li, Guorong and Han, Zhenjun and Jiao, Jianbin}, title = {HeroGS: Hierarchical Guidance for Robust 3D Gaussian Splatting under Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11788-11797} }
SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Pengfei and Zhao, Xingran and Chen, Juntao and Hou, Peng and Longchao, Wang and Lin, Jianghang and Zhang, Shengchuan and Zeng, Anxiang and Cao, Liujuan}, title = {SEA-Vision: A Multilingual Benchmark for Comprehensive Document and Scene Text Understanding in Southeast Asia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30895-30905} }
VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Ziheng and Cao, Linhan and Han, Jinliang and Zhang, Zicheng and Qian, Jiaying and Wang, Jiarui and Chen, Zijian and Zhai, Guangtao and Min, Xiongkuo}, title = {VITAL: Vision-Encoder-centered Pre-training for LMMs in Visual Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41245-41255} }
Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Pi_2026_CVPR, author = {Pi, Jiacheng and Yang, Zhiguo and Huang, Xingxing and Xu, Dongsheng and Zhong, Ruizhi and Ruan, Wenjie}, title = {Jailbreaking Vision-Language Models via Dissonance-Guided Suffix Optimization and Image-Phrase Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30087-30097} }
PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xijun and Liu, Hongying and Shang, Fanhua and Hui, Yanming and Wan, Liang}, title = {PDD: Manifold-Prior Diverse Distillation for Medical Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28534-28544} }
Bidirectional Query-Driven Generation of Parametric CAD Sketch-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Ren, Daxuan and Ding, Yijie and Zheng, Jianmin and Deng, Fang}, title = {Bidirectional Query-Driven Generation of Parametric CAD Sketch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3176-3185} }
Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Maruani_2026_CVPR, author = {Maruani, Nissim and Zhang, Peiying and Chaudhuri, Siddhartha and Fisher, Matthew and Zhao, Nanxuan and Kim, Vladimir G. and Alliez, Pierre and Desbrun, Mathieu and Yifan, Wang}, title = {Illustrator's Depth: Monocular Layer Index Prediction for Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26824-26834} }
On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {On the Role of Temporal Granularity in the Robustness of Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27946-27955} }
SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Peng, Tao and Yuan, Yongzhe and Feng, Kaiyuan and Li, Hao and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {SRGCD: Stability-Driven Region Growth Framework for 3D Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7546-7555} }
MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Wenfeng and Wang, Xuehan and Li, Shuai and Chen, Yi and Guo, Yuting and Wu, Zhenyu and Jin, Xingliang and Chen, Chenglizhao and Hou, Fei and Wu, Hongyu and Hao, Aimin}, title = {MoCoDiff: A Controllable Autoregressive Diffusion Model for Expressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23528-23537} }
SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Prutsch_2026_CVPR, author = {Prutsch, Alexander and Fruhwirth-Reisinger, Christian and Schinagl, David and Possegger, Horst}, title = {SHARP: Short-Window Streaming for Accurate and Robust Prediction in Motion Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32103-32112} }
Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Dual-Estimator: Decoupling Global and Local Semantic Shift for Drift Compensation in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10799-10809} }
SAM 3D Body: Robust Full-Body Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xitong and Kukreja, Devansh and Pinkus, Don and Fan, Taosha and Park, Jinhyung and Shin, Soyong and Cao, Jinkun and Liu, Jia-Wei and Ugrinovic, Nicol\'as and Sagar, Anushka and Malik, Jitendra and Feiszli, Matt and Doll\'ar, Piotr and Kitani, Kris}, title = {SAM 3D Body: Robust Full-Body Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7209-7219} }
Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Da and Li, Bingyu and Wang, Feiyu and Zhao, Zhiyuan and Gao, Junyu}, title = {Boosting Quantitive and Spatial Awareness for Zero-Shot Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20356-20366} }
OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Sheng-Yu and Choe, Jaesung and Wang, Yu-Chiang Frank and Sun, Cheng}, title = {OpenVoxel: Training-Free Grouping and Captioning Voxels for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16734-16745} }
GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yuxiao and Chen, Junchi and Jin, Zhenchao and Miao, Changtao and Yuan, Haojie and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {GuardTrace-VL: Detecting Unsafe Multimodel Reasoning via Iterative Safety Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11912-11922} }
SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Zhao, Yian and Pu, Fanqi and Yang, Xiaochen and Tang, Yang and Chen, Xi and Yang, Wenming}, title = {SPAN: Spatial-Projection Alignment for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40762-40771} }
RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hao and Wang, Yuhao and Hao, Wenning and Zhang, Pingping and Wang, Dong and Lu, Huchuan}, title = {RAGTrack: Language-aware RGBT Tracking with Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28179-28189} }
Is the Modality Gap a Bug or a Feature? A Robustness Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowers_2026_CVPR, author = {Chowers, Rhea and Naparstek, Oshri and Barzelay, Udi and Weiss, Yair}, title = {Is the Modality Gap a Bug or a Feature? A Robustness Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30288-30298} }
Federated Active Learning Under Extreme Non-IID and Global Class Imbalance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Huang, Sheng-Jun}, title = {Federated Active Learning Under Extreme Non-IID and Global Class Imbalance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24534-24544} }
Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Hao and Zhang, Runqing and Ding, Jin and Zhou, Xue and Zou, Jianxiao and Cai, Mingzhu}, title = {Tackling Alignment Ambiguity in Person Retrieval through Conversational Attribute Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9689-9698} }
Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Yin, Jia-Li and Lin, Luojun and Lin, Wei}, title = {Transform to Transfer: Boosting Adversarial Attack Transferability on Vision-Language Pre-training Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30141-30150} }
Streaming Diffusion Model for Fast Infrared and Visible Video Fusion-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Sun, Ludan and Ma, Tengyu and Yang, Chunyan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Streaming Diffusion Model for Fast Infrared and Visible Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14305-14314} }
Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sen and Liu, Bangwei and Gao, Zhenkun and Ma, Lizhuang and Wang, Xuhong and Xie, Yuan and Tan, Xin}, title = {Explore with Long-term Memory: A Benchmark and Multimodal LLM-based Reinforcement Learning Framework for Embodied Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37098-37108} }
Multi-Modal Image Fusion via Intervention-Stable Feature Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xue and Guan, Zheng and Qian, Wenhua and Wang, Chengchao and Ma, Runzhuo}, title = {Multi-Modal Image Fusion via Intervention-Stable Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33827-33837} }
Dark3R: Learning Structure from Motion in the Dark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Andrew Y. and Malik, Anagh and Tedla, SaiKiran and Dai, Yutong and Qin, Yiqian and Salehe, Zach and Attal, Benjamin and Nousias, Sotiris and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Dark3R: Learning Structure from Motion in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34160-34170} }
Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanzhe and Chen, Hao and Yin, Rui and Ba, Juyan and Zhang, Yu and Lu, Sheng}, title = {Gastric-X: A Multimodal Multi-Phase Benchmark Dataset for Advancing Vision-Language Models in Gastric Cancer Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2490-2501} }
AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yu and Gu, Jianyang and Liu, Hao and Cao, Yue and Hamari, Jozsef and Liu, Zheng and Zardadi, Mohsen}, title = {AVION: Aerial Vision-Language Instruction from Offline Teacher to Prompt-Tuned Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10106-10115} }
Efficient and Training-Free Single-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haojun and Kutulakos, Kiriakos N. and Lindell, David B.}, title = {Efficient and Training-Free Single-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36157-36167} }
OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Rui and Wu, Huisi and Qin, Jing}, title = {OSA: Echocardiography Video Segmentation via Orthogonalized State Update and Anatomical Prior-aware Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1428-1438} }
Detecting Compressed AI-Generated Images via Phase Spectrum Robustness-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kai and Ren, Wenqi and Wang, Wei and Cao, Xiaochun}, title = {Detecting Compressed AI-Generated Images via Phase Spectrum Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35427-35436} }
VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Wenhao and Wang, Hao and Yin, Wanqi and Liu, Fayao and Yang, Xulei and Liang, Chao and Cai, Zhongang and Lin, Guosheng}, title = {VLM-Guided Group Preference Alignment for Diffusion-based Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13918-13929} }
Tunable Soft Equivariance with Guarantees-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Ashiqur and Hao, Lim Jun and Jiang, Jeremiah and Lim, Teck-Yian and Yeh, Raymond A.}, title = {Tunable Soft Equivariance with Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17693-17703} }
PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jingyuan and Hu, Yumeng and Gao, Fei and Zhang, Mingjin}, title = {PhysIR-Splat: Physically Consistent Thermal Infrared Radiative Transfer in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11818-11828} }
Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ray and Greiff, Marcus and Lew, Thomas and Subosits, John}, title = {Generalized-CVO: Fast and Correspondence-Free Local Point Cloud Registration with Second Order Riemannian Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2948-2958} }
MultiAnimate: Pose-Guided Image Animation Made Extensible-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yingcheng and Gong, Haowen and Yang, Chuanguang and An, Zhulin and Xu, Yongjun and Liu, Songhua}, title = {MultiAnimate: Pose-Guided Image Animation Made Extensible}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9306-9316} }
Exposing and Evaluating Hallucinations for GUI Grounding-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Jing, Hongyi and Lv, Rui and Fang, Shuo and Zhu, Shiai and Wang, Junying and Li, Chunyi and Liu, Xiaohong and Ma, Chenguang and Zhai, Guangtao}, title = {Exposing and Evaluating Hallucinations for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40208-40223} }
Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Honglin and Zhu, Chenjie and Ding, Jianbiao and Ni, Zixuan and Li, Wei and Mi, Zhenpeng and Wang, Qian}, title = {Beyond Sequential Tools: A Unified VLM Agent System for Photographic Post-Processing via Dynamic Multi-Expert Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41521-41530} }
ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP-
[pdf]
[supp]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Xin and Zhao, Manqi and Jiang, Dongsheng and Wu, Yingying and Su, Bing}, title = {ReAttnCLIP: Training-Free Open-Vocabulary Remote Sensing Image Segmentation via Re-defined Attention in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24980-24989} }
LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Shi, Junhao and Dai, Zihao and Cai, Jikun and Qian, Pengfang and Ji, Li and He, Xinzhe and Zhang, Shiduo and Fei, Zhaoye and Fu, Jinlan and Gong, Jingjing and Qiu, Xipeng}, title = {LIBERO-Plus: A Progressive Robustness Benchmark for Visual-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38574-38583} }
MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zilong and He, Jun and Huang, Xiaobin and Xiong, Ziyi and Luo, Yang and Ye, Junyan and Li, Weijia and Chen, Yiping and Han, Ting}, title = {MajutsuCity: Language-driven Aesthetic-adaptive City Generation with Controllable 3D Assets and Layouts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31608-31618} }
AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongjie and Yu, Heng and Li, Jiaman and Yu, Hong-Xing and Adeli, Ehsan and Liu, C. Karen and Wu, Jiajun}, title = {AnyLift: Scaling Motion Reconstruction from Internet Videos via 2D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13876-13886} }
Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Euisoo and Kim, Byunghyun and Kim, Hyunjin and Cho, Seonghye and Lee, Jae-Gil}, title = {Accelerating Diffusion via Hybrid Data-Pipeline Parallelism Based on Conditional Guidance Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9374-9383} }
OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinlu and Kang, Zixi and Liu, Libin and Chang, Jianlong and Tian, Qi and Gao, Feng and Wang, Yizhou}, title = {OpenDance: Multimodal Controllable 3D Dance Generation with Large-scale Internet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28860-28870} }
UniVerse: Empower Unified Generation with Reasoning and Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Kaiyue and Jin, Weiyang and Duan, Chengqi and Fang, Rongyao and Liu, Xian and Niu, Yuwei and Wang, Chunwei and Li, Aoxue and Liu, Xihui}, title = {UniVerse: Empower Unified Generation with Reasoning and Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21997-22006} }
EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ruoxi and Ma, Hao-Xuan and Ma, Teng and Zhang, Hongyi}, title = {EcoAlign: An Economically Rational Framework for Efficient LVLM Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17451-17461} }
Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yunxiang and Ma, Mengmeng and Yao, Ziyu and Peng, Xi}, title = {Inside-Out: Measuring Generalization in Vision Transformers Through Inner Workings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38936-38946} }
AniMimic: Imitating 3D Animation from Video Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tianyi and Chen, Yunuo and Guo, Yaowei and Yang, Yin and Zhou, Bolei and Terzopoulos, Demetri and Jiang, Ying and Jiang, Chenfanfu}, title = {AniMimic: Imitating 3D Animation from Video Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40266-40276} }
Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy-
[pdf]
[supp]
[bibtex]@InProceedings{Marikkar_2026_CVPR, author = {Marikkar, Umar and Husain, Syed Sameed and Awais, Muhammad and Atito, Sara}, title = {Building Robust Vision Encoders for Cross-Dataset Evaluation in Immunofluorescent Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28308-28317} }
Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Liangzu and Chattopadhyay, Aditya and Zancato, Luca and Nunez, Elvis and Xia, Wei and Soatto, Stefano}, title = {Gated KalmaNet: A Fading Memory Layer through Test-time Ridge Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20212-20222} }
The Missing Point in Vision Transformers for Universal Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shahabodini_2026_CVPR, author = {Shahabodini, Sajjad and Mansoori, Mobina and Bayatmakou, Farnoush and Abouei, Jamshid and Plataniotis, Konstantinos and Mohammadi, Arash}, title = {The Missing Point in Vision Transformers for Universal Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6314-6324} }
AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ye and Liu, Shouyi and Yang, Huiyu and Gu, Jianghang and Fan, Wenhao and Yang, Zhongxin and Wang, Ding and Chen, Simeng and Jiang, Zirun and Bin, Yuanwei and Chen, Shiyi and Chen, Yuntian}, title = {AeroAgent: A Vision-Physics-Decision Framework for Aerodynamic Vehicle Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11694-11703} }
Reviving ConvNeXt for Efficient Convolutional Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Taesung and Bianchi, Lorenzo and Wittke, Lennart and Watine, Felix and Carrara, Fabio and Ye, Jong Chul and Weber, Romann and Azevedo, Vinicius}, title = {Reviving ConvNeXt for Efficient Convolutional Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43675-43685} }
StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhiyao and Peng, Ziqiao and Ma, Yifeng and Chen, Yi and Zhou, Zhengguang and Zhou, Zixiang and Zhang, Guozhen and Zhang, Youliang and Zhou, Yuan and Lu, Qinglin and Liu, Yong-Jin}, title = {StreamAvatar: Streaming Diffusion Models for Real-Time Interactive Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10887-10897} }
DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Jiawei and Zhang, Shenghao and Wang, Can and Gu, Zheng and Ling, Yonggen and Zeng, Taiping and Xue, Xiangyang and Zhang, Jingbo}, title = {DetAny4D: Detect Anything 4D Temporally in a Streaming RGB Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32798-32807} }
Rethinking Token Reduction for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhang, Haofei and Huang, Qihan and Cao, Anda and Fang, Gongfan and Wang, Wei and Jin, Xuan and Song, Jie and Song, Mingli and Wang, Xinchao}, title = {Rethinking Token Reduction for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24727-24737} }
CaptionQA: Is Your Caption as Useful as the Image Itself?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Shijia and Liu, Yunong and Zhai, Bohan and Sun, Ximeng and Liu, Zicheng and Barsoum, Emad and Li, Manling and Xu, Chenfeng}, title = {CaptionQA: Is Your Caption as Useful as the Image Itself?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23741-23750} }
Dynamics-Aware Preference Optimization for Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Cai, Kaitong and Yang, Jing and Wang, Jian and Wang, Keze}, title = {Dynamics-Aware Preference Optimization for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11610-11620} }
Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Xinlei and Peng, Xiulian and Li, Xiao and Xiong, Zhiwei and Lu, Yan}, title = {Hierarchical Long Video Understanding with Audiovisual Entity Cohesion and Agentic Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32882-32891} }
FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yilei and Wang, Zhen and Wang, Yanghao and Yu, Jun and Zhuang, Yueting and Xiao, Jun and Chen, Long}, title = {FlowDC: Flow-Based Decoupling-Decay for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25757-25766} }
PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Hee Suk and Yoon, Eunseop and Hong, Ji Woo and Eom, SooHwan and Koo, Gwanhyeong and Hasegawa-Johnson, Mark and Dai, Qi and Luo, Chong and Yoo, Chang D.}, title = {PDCR: Perception-Decomposed Confidence Reward for Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18881-18891} }
CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Fiastre_2026_CVPR, author = {Fiastre, Gabriel and Yang, Antoine and Schmid, Cordelia}, title = {CaptionFormer: Unified Segmentation, Tracking, and Captioning for Spatio-Temporal Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39518-39528} }
MeanFlow Transformers with Representation Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Zheyuan and Lai, Chieh-Hsin and Wu, Ge and Mitsufuji, Yuki and Ermon, Stefano}, title = {MeanFlow Transformers with Representation Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25709-25718} }
WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jian and Yin, Dacheng and He, Xiaoxuan and Li, Yong and Rao, Fengyun and Lyu, Jing and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {WeMMU: Enhanced Bridging of Vision-Language Models and Diffusion Models via Noisy Query Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17609-17618} }
PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Mingju and Yang, Kaisen and Gao, Huan-ang and Li, Bohan and Ding, Ao and Li, Wenyi and Yu, Yangcheng and Liu, Jinkun and Xu, Shaocong and Niu, Yike and Chi, Haohan and Chen, Hao and Tang, Hao and Zhang, Yu and Yi, Li and Zhao, Hao}, title = {PAM: A Pose-Appearance-Motion Engine for Sim-to-Real HOI Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15954-15965} }
CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Sohwi and Hyoseok, Lee and Park, Jungjoon and Oh, Tae-Hyun}, title = {CLAY: Conditional Visual Similarity Modulation in Vision-Language Embedding Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9679-9688} }
The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vyas_2026_CVPR, author = {Vyas, Kushal and Kayabasi, Alper and Kim, Daniel and Saragadam, Vishwanath and Veeraraghavan, Ashok and Balakrishnan, Guha}, title = {The Surprising Effectiveness of Noise Pretraining for Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6116-6125} }
Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Estepa_2026_CVPR, author = {Estepa, Imanol G. and Rodr{\'\i}guez-de-Vera, Jes\'us M. and Nagarajan, Bhalaji and Radeva, Petia}, title = {Learning from Semantic Dictionaries: Discriminative Codebook Contrastive Learning for Unified Visual Representation and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22456-22466} }
ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Haian and Wu, Rundi and Zhang, Tianyuan and Gao, Ruiqi and Barron, Jonathan T. and Snavely, Noah and Ho{\l}y\'nski, Aleksander}, title = {ZipMap: Linear-Time Stateful 3D Reconstruction via Test-Time Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21748-21759} }
SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jingyu and Wu, Junjie and Hu, Dongnan and Huang, Xiangkai and Sun, Bin and Hao, Zhihui and Lang, Xianpeng and Zhu, Xiatian and Zhang, Li}, title = {SGDrive: Scene-to-Goal Hierarchical World Cognition for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4032-4042} }
OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhai_2026_CVPR, author = {Zhai, Hongjia and Zhang, Qi and Pan, Xiaokun and Zhang, Xiyu and Dong, Yitong and Zhang, Huaqi and Xu, Dan and Zhang, Guofeng}, title = {OnlinePG: Online Open-Vocabulary Panoptic Mapping with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33269-33279} }
Are Image-to-Video Models Good Zero-Shot Image Editors?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zechuan and Chen, Zhenyuan and Yang, Zongxin and Yang, Yi}, title = {Are Image-to-Video Models Good Zero-Shot Image Editors?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2090-2103} }
PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Prakash, Tushar and Juvekar, Adheesh and Nguyen, Kiet A. and Jang, Dong-Hwan and Dhillon, Inderjit S and Lourentzou, Ismini}, title = {PyraTok: Language-Aligned Pyramidal Tokenizer for Video Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37906-37917} }
RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Ji and Ying, Xianghua and Xing, Bowei and Guo, Ruohao and Yue, Wenzhen}, title = {RT-Splatting: Joint Reflection-Transmission Modeling with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4870-4880} }
From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajie and Schwertfeger, S\"oren and Kleiner, Alexander}, title = {From Observation to Action: Latent Action-based Primitive Segmentation for VLA Pre-training in Industrial Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6750-6759} }
Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?-
[pdf]
[supp]
[bibtex]@InProceedings{Aravanis_2026_CVPR, author = {Aravanis, Tilemachos and Stojni\'c, Vladan and Psomas, Bill and Komodakis, Nikos and Tolias, Giorgos}, title = {Retrieve and Segment: Are a Few Examples Enough to Bridge the Supervision Gap in Open-Vocabulary Segmentation?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27621-27632} }
Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {Shoe Style-Invariant and Ground-Aware Learning for Dense Foot Contact Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7058-7067} }
UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Shuo and Wang, Di and Chen, He and Guo, Haonan and Zhang, Ning and Zhang, Jing}, title = {UniGeoSeg: Towards Unified Open-World Segmentation for Geospatial Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34865-34876} }
ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xiaolong and Yu, Yitong and Xiong, Shiyao and Hao, Jinhua and Sun, Ming and Zhou, Chao and Wang, Bin}, title = {ShiftLUT: Spatial Shift Enhanced Look-Up Tables for Efficient Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29959-29968} }
NaTex: Seamless Texture Generation as Latent Color Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Zeqiang and Zhao, Yunfei and Zhao, Zibo and Yang, Xin and Huang, Xin and Huang, Jingwei and Yue, Xiangyu and Guo, Chunchao}, title = {NaTex: Seamless Texture Generation as Latent Color Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18619-18629} }
HoneyBee: Data Recipes for Vision-Language Reasoners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bansal_2026_CVPR, author = {Bansal, Hritik and Sachan, Devendra Singh and Chang, Kai-Wei and Grover, Aditya and Ghosh, Gargi and Yih, Wen-tau and Pasunuru, Ramakanth}, title = {HoneyBee: Data Recipes for Vision-Language Reasoners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26262-26273} }
One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Linghui and Liu, Yuhan and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {One-Shot Flow, Any-Time Frame: A Bidirectional Warping Framework for Event-Based Video Frame Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2832-2842} }
Condensed Test-Time Adaptation of VLMs for Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Wenxuan and Qu, Hongyu and Yan, Rui and Xie, Guo-Sen and Yao, Yazhou and Shu, Xiangbo and Tang, Jinhui}, title = {Condensed Test-Time Adaptation of VLMs for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16977-16987} }
RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh and de Silva Edirimuni, Dasith and Hassan, Ghulam Mubashar and Mian, Ajmal}, title = {RI-Mamba: Rotation-Invariant Mamba for Robust Text-to-Shape Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16834-16844} }
PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Zhengdong and Wang, Chao and Rao, Fengyun and LYU, Jing and Fan, Hehe and Yang, Yi}, title = {PointThinker: Point-Incentivized Parallel Thinking for Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26240-26250} }
Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jingzhou and Chen, Dexin and Xiong, Fengchao and Qian, Yuntao and Xiao, Liang}, title = {Balanced Hierarchical Contrastive Learning with Decoupled Queries for Fine-grained Object Detection in Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20619-20628} }
Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Bowen and Duan, Zheng-Peng and Yan, Qixin and Wang, Wenjing and Liu, Hao and Guo, Chun-Le and Li, Chongyi and Li, Chen and Lyu, Jing}, title = {Stand-In: A Lightweight and Plug-and-Play Identity Control for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23314-23324} }
SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chang-Hsun and Chang, Kai-Po and Sheng, Yu-Yang and Chung, Hung-Kai and Wang, Kuei-Chun and Wang, Yu-Chiang Frank}, title = {SEASON: Mitigating Temporal Hallucination in Video Large Language Models via Self-Diagnostic Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11096-11105} }
VecGlypher: Unified Vector Glyph Generation with Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xiaoke and Gauri, Bhavul and Ng, Kam Woh and Ng, Tony and Xu, Mengmeng and Liu, Zhiheng and Ren, Weiming and An, Zhaochong and Zhou, Zijian and Qiu, Haonan and Zhou, Yuyin and He, Sen and Wang, Ziheng and Xiang, Tao and Han, Xiao}, title = {VecGlypher: Unified Vector Glyph Generation with Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24373-24383} }
MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haochen and Kong, Yuyao and Xu, Yongxiu and Gou, Gaopeng and Xu, Hongbo and Wang, Yubin and Zhang, Haoliang}, title = {MMSD3.0: A Multi-Image Benchmark for Real-World Multimodal Sarcasm Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37885-37895} }
Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Weijian and Zhang, Songqian and Han, Yuqi and Zhuang, Jian and Huang, Yongdong and Zhang, Qiang}, title = {Fusion in Your Way: Aligning Image Fusion with Heterogeneous Demands via Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41499-41509} }
A Training-Free Style-Personalization via SVD-Based Feature Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kyoungmin and Park, Jihun and Gim, Jongmin and Choi, Wonhyeok and Hwang, Kyumin and Kim, Jaeyeul and Im, Sunghoon}, title = {A Training-Free Style-Personalization via SVD-Based Feature Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {506-516} }
Beyond the Ground Truth: Enhanced Supervision for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ryou_2026_CVPR, author = {Ryou, Donghun and Ha, Inju and Chu, Sanghyeok and Han, Bohyung}, title = {Beyond the Ground Truth: Enhanced Supervision for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29949-29958} }
SineProject: Machine Unlearning for Stable Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2026_CVPR, author = {Garg, Arpit and Saratchandran, Hemanth and Lucey, Simon}, title = {SineProject: Machine Unlearning for Stable Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31735-31745} }
Unlocking Token Rewards via Training-Free Reward Attribution-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Sitong and Tan, Haoru and Xia, Bin and Zhang, Xichen and Li, Jingyao and Zhang, Shaofeng and Qi, Xiaojuan and Yu, Bei and Jia, Jiaya}, title = {Unlocking Token Rewards via Training-Free Reward Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5082-5091} }
Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruiyang and Liu, Fang and Jiao, Licheng and Xie, Xinglin and Hao, Jiayao and Li, Shuo and Liu, Xu and Yang, Jingyi and Li, Lingling and Chen, Puhua and Ma, Wenping}, title = {Delving Aleatoric Uncertainty in Medical Image Segmentation via Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30011-30020} }
Language Models Can Explain Visual Features via Steering-
[pdf]
[supp]
[bibtex]@InProceedings{Ferrando_2026_CVPR, author = {Ferrando, Javier and Lopez-Cuena, Enrique and Martin-Torres, Pablo Agustin and Hinjos, Daniel and Arias-Duart, Anna and Garcia-Gasulla, Dario}, title = {Language Models Can Explain Visual Features via Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38947-38958} }
PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qixiu and Zhu, Xiang and Li, Xiaoyong and Xu, Xiaolong}, title = {PhyOceanCast: Global Ocean Forecasting with Physics-Informed Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23652-23662} }
PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yining and Li, Shijie and Wu, Yuchen and Yang, Xulei and Zhao, Na}, title = {PanDA: Unsupervised Domain Adaptation for Multimodal 3D Panoptic Segmentation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33057-33067} }
FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Hang and Fan, Hongwei and Zhang, Han and Wu, Duojin and Zhang, Jiyao and Dong, Hao}, title = {FreeArtGS: Articulated Gaussian Splatting Under Free-moving Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11777-11787} }
ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Yang, Shuhan and Tang, Linfeng and Yi, Xunpeng and Ma, Jiayi}, title = {ReCoFuse: Ultra-Robust Image Fusion via Restorative Multi-Modal Diffusion Reciprocal Coupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33838-33847} }
GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shah_2026_CVPR, author = {Shah, Uzair and Agus, Marco and Gamal, Mahmoud and Alzubaidi, Mahmood and Cali, Corrado and Magistretti, Pierre J. and Bouzerdoum, Abdesselam and Househ, Mowafa}, title = {GraPHFormer: A Multimodal Graph Persistent Homology Transformer for the Analysis of Neuroscience Morphologies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28224-28233} }
The Drift Kernel: Why Diffusion Models Change Even When Told Not To-
[pdf]
[supp]
[bibtex]@InProceedings{Ram_2026_CVPR, author = {Ram, Gokul Srinath Seetha and Elavazhagan, Rashmi}, title = {The Drift Kernel: Why Diffusion Models Change Even When Told Not To}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43281-43289} }
Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Nan_2026_CVPR, author = {Nan, Kepan and Zhao, Wangbo and Zhou, Penghao and Li, Jun and Yang, Zhenheng and Yang, Jian and Tai, Ying}, title = {Accelerating Autoregressive Video Diffusion via History-Guided Cache and Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43740-43750} }
HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Jindi and Zhou, Yuhao and Tian, Yuxin and Ye, Qing and Feng, Wentao and Lv, Jiancheng}, title = {HyperNAS: Enhancing Architecture Representation for NAS Predictor via Hypernetwork}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12955-12965} }
Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Yikun and Hong, Yan and Deng, Bowen and Lan, Jun and Zhu, Huijia and Wang, Weiqiang and Zhang, Liqing and Zhang, Jianfu}, title = {Locate-Then-Examine: Grounded Region Reasoning Improves Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19165-19175} }
Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wentao and Lyu, Weimin and Lou, Peiliang and Hu, Qingqiao and Hu, Xiaoling and Abousamra, Shahira and Han, Wenchao and Guo, Ruifeng and Zhou, Jiawei and Chen, Chao and Wang, Chen}, title = {Act Like a Pathologist: Tissue-Aware Whole Slide Image Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6972-6981} }
Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiwen and Zhang, Shichao and Wang, Ruowei and Li, Mao and Zhou, Chenyu and Zhou, Ji-Zhe and Zhao, Qijun and Zhang, Hailun}, title = {Dehallu3D: Hallucination-Mitigated 3D Generation from a Single Image via Cyclic View Consistency Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19993-20002} }
Does YOLO Really Need to See Every Training Image in Every Epoch?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Xingxing and Dong, Jiahua and Han, Junwei and Cheng, Gong}, title = {Does YOLO Really Need to See Every Training Image in Every Epoch?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {135-144} }
Personalized Federated Training of Diffusion Models with Privacy Guarantees-
[pdf]
[supp]
[bibtex]@InProceedings{Patel_2026_CVPR, author = {Patel, Kumar Kshitij and Jiang, Bingqing and Kabir, A F M Mahfuzul and Zhang, Weitong and Zou, Difan and Wang, Lingxiao}, title = {Personalized Federated Training of Diffusion Models with Privacy Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31790-31801} }
KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Tschirschwitz_2026_CVPR, author = {Tschirschwitz, David and Rodehorst, Volker}, title = {KaLOS finds Consensus: A Meta-Algorithm for Evaluating Inter-Annotator Agreement in Complex Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38554-38563} }
Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Dwivedi_2026_CVPR, author = {Dwivedi, Rajeev Ranjan and Dangwal, Anshuman and Kurmi, Vinod K}, title = {Rank-Guided Pseudo-Bias Learning for Robust Black-Box Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31683-31692} }
Learning What Helps: Task-Aligned Context Selection for Vision Tasks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jingyu and Konuk, Emir and Strand, Fredrik and Matsoukas, Christos and Smith, Kevin}, title = {Learning What Helps: Task-Aligned Context Selection for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11632-11642} }
TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hanshen and Liu, Yuliang and Wu, Xuecheng and Wang, An-Lan and Feng, Hao and Yang, Dingkang and Feng, Chao and Huang, Can and Tang, Jingqun and Bai, Xiang}, title = {TextPecker: Rewarding Structural Anomaly Quantification for Enhancing Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22059-22069} }
Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinrong and Xu, Zhaoyang and He, Xusheng and Li, Xinrui and Zheng, Na and Wu, Jianlong}, title = {Breaking the Regional Perception Bottleneck of Multimodal Large Language Models via External Reasoning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33531-33541} }
Scaling Spatial Intelligence with Multimodal Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Zhongang and Wang, Ruisi and Gu, Chenyang and Pu, Fanyi and Xu, Junxiang and Wang, Yubo and Yin, Wanqi and Yang, Zhitao and Wei, Chen and Zhou, Tongxi and Sun, Qingping and Pang, Hui En and Li, Jiaqi and Qian, Oscar and Lin, Zhiqian and Shi, Xuanke and Deng, Kewang and Han, Xiaoyang and Chen, Zukai and Fan, Xiangyu and Deng, Hanming and Lu, Lewei and Pan, Liang and Li, Bo and Liu, Ziwei and Wang, Quan and Lin, Dahua and Yang, Lei}, title = {Scaling Spatial Intelligence with Multimodal Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7879-7890} }
Semantic Audio-Visual Navigation in Continuous Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yichen and Wang, Hebaixu and Liu, Meng and Zhou, Yu and Gao, Chen and Chen, Kehan and Huang, Gongping}, title = {Semantic Audio-Visual Navigation in Continuous Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22369-22379} }
Cross-Modal Attention Calibration for LVLM Hallucination Mitigation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Jie, Zequn and Ma, Lin and Li, Ming and Luo, Xiaonan and Li, Guanbin}, title = {Cross-Modal Attention Calibration for LVLM Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40186-40196} }
MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Junyoung and Kim, Seokwon and Kim, Jung Uk}, title = {MonoSAOD: Monocular 3D Object Detection with Sparsely Annotated Label}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4718-4727} }
Human Geometry Distribution for 3D Animation Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Xiangjun and Zhang, Biao and Wonka, Peter}, title = {Human Geometry Distribution for 3D Animation Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38313-38323} }
Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Ju, Tianjie and Wu, Zheng and Zhang, Zhuosheng and Liu, Gongshen}, title = {Training High-Level Schedulers with Execution-Feedback Reinforcement Learning for Long-Horizon GUI Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27525-27535} }
Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Lu, Yang and Zhang, Yachao and Wang, Fangyong and Xie, Yuan and Qu, Yanyun}, title = {Direct Segmentation without Logits Optimization for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13168-13178} }
Moving Border Ownership for Event-based Motion Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Zhiyuan and Ferm\"uller, Cornelia and Aloimonos, Yiannis}, title = {Moving Border Ownership for Event-based Motion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37043-37052} }
TGTrack: Temporal Generative Learning for Unified Single Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Wanting and Chen, Xin and Sun, Chuanyu and Zhao, Jie and Kang, Ben and Wang, Dong and Lu, Huchuan}, title = {TGTrack: Temporal Generative Learning for Unified Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28134-28144} }
Towards Visual Query Localization in the 3D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Liang and Tan, Bohan and Zhang, Zhipeng and Li, Haobo and Jiao, Yifan and Dong, Xingping and Zhang, Libo}, title = {Towards Visual Query Localization in the 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41406-41415} }
OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yating and Qi, Zhaoshuai and Zou, Yang and Yang, Yongnan and Zhang, Shizhou and Zhang, Yanning}, title = {OrienPose: Orientation-Guided Novel View Synthesis for Single-Image Unseen Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26813-26823} }
What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Pascal and 0000-0002-8590-8039, Studios\_\_\_ Switzerland and Lascheit, Kai and blank, Studios\_\_\_ Switzerland and Tang, Jingwei and 0009-0000-6005-7808, Studios\_\_\_ Switzerland and Gross, Markus and 0009-0003-9324-779X, Studios\_\_\_ Switzerland and Azevedo, Vinicius C. and 0009-0002-4133-4309, Studios\_\_\_ Switzerland}, title = {What Is It Like to Be a Noise? An Entropy-based Gaussian Noise Regularization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43471-43481} }
PhysVid: Physics Aware Local Conditioning for Generative Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pathak_2026_CVPR, author = {Pathak, Saurabh and Arani, Elahe and Pechenizkiy, Mykola and Zonooz, Bahram}, title = {PhysVid: Physics Aware Local Conditioning for Generative Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41847-41858} }
Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Liu, Mengyin and Wu, Zekai and Lin, Xincheng and Zhang, Junsheng and Yan, Ming and Xie, Zengye and Zhang, Changwang and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {Towards Motion Turing Test: Evaluating Human-Likeness in Humanoid Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16486-16498} }
Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Honglu and Fang, Zhiqin and Zhao, Ningning and Hou, Saihui and Ma, Long and Pei, Renwang and He, Zhaofeng}, title = {Harnessing Chain-of-Thought Reasoning in Multimodal Large Language Models for Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33566-33576} }
CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Pei and Zhang, Shanshan and Yang, Jian}, title = {CrossHOI: Learning Cross-View Representations for Monocular 3D Human-Object Interaction Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7121-7130} }
VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhengfei and Lin, Rui and Zhao, Long and Wetzstein, Gordon and Xie, Saining and Woo, Sanghyun}, title = {VULCAN: Tool-Augmented Multi Agents for Iterative 3D Object Arrangement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23763-23773} }
ArtLLM: Generating Articulated Assets via 3D LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Penghao and Xie, Siyuan and Yan, Hongyu and Yang, Xianghui and Huang, Jingwei and Guo, Chunchao and Gu, Jiayuan}, title = {ArtLLM: Generating Articulated Assets via 3D LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34281-34291} }
DDT: Decoupled Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuai and Tian, Zhi and Huang, Weilin and Wang, Limin}, title = {DDT: Decoupled Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40633-40642} }
DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Zhang, Dizhe and Li, Xiangtai and Du, Bo and Qi, Lu}, title = {DiT360: High-Fidelity Panoramic Image Generation via Hybrid Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23367-23377} }
Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, JooHyung and Oh, Minyoung and Yoon, Sung Whan and Sim, Jae-Young}, title = {Pose-guided Enriched Feature Learning for Federated-by-camera Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40458-40467} }
ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Wenjie and Zhang, Yabin and Jin, Xin and Zeng, Wenjun and Zhang, Lei}, title = {ANTS: Adaptive Negative Textual Space Shaping for OOD Detection via Test-Time MLLM Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20-30} }
HTTM: Head-wise Temporal Token Merging for Faster VGGT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Weitian and Meiner, Lukas and Shubham, Rai and De La Parra, Cecilia and Kumar, Akash}, title = {HTTM: Head-wise Temporal Token Merging for Faster VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26379-26388} }
SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haghighi_2026_CVPR, author = {Haghighi, Yasaman and Alahi, Alexandre}, title = {SenCache: Accelerating Diffusion Model Inference via Sensitivity-Aware Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14295-14304} }
PAI-Bench: A Comprehensive Benchmark For Physical AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fengzhe and Huang, Jiannan and Li, Jialuo and Ramanan, Deva and Shi, Humphrey}, title = {PAI-Bench: A Comprehensive Benchmark For Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21522-21536} }
Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Junghwan and Cho, Woojin and Heo, Junhyuk and Kwon, Darongsae and Lee, Kookjin}, title = {Basis-Oriented Low-rank Transfer for Few-Shot and Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {860-870} }
GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Ruoke and Yang, Mingjia and Zhang, Xinfeng and Tang, Haocheng and Yin, Qian and Deng, Zhipin and Zhang, Kai and Zhang, Li and Ma, Siwei}, title = {GauMVC: Generative Decoupled Gaussian Representation for Human-centric Multi-view Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4963-4972} }
Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Kaiqiang and Peng, Rui and Wu, Jiahao and Wang, Zhanke and Liang, Jie and Zheng, Xiaoyun and Gao, Feng and Wang, Ronggang}, title = {Intrinsic Geometry-Appearance Consistency Optimization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40918-40928} }
ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zikai and Zhang, Zhilu and Wang, Yiqing and Li, Hui and Zuo, Wangmeng}, title = {ArtHOI: Taming Foundation Models for Monocular 4D Reconstruction of Hand-Articulated-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15998-16009} }
SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Zhou, Shijie and Liu, Bangya and Kadambi, Achuta and Fan, Zhiwen}, title = {SpatialStack: Layered Geometry-Language Fusion for 3D VLM Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38678-38688} }
Hint2Gen: Bridging Understanding and Generation via Code-structured Hints-
[pdf]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Chen, Xi and Li, Liang and Zhao, Hengshuang}, title = {Hint2Gen: Bridging Understanding and Generation via Code-structured Hints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36593-36603} }
Goldilocks Test Sets for Face Verification-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiyu and Tian, Sicong and Bhatta, Aman and Gutierrez, Jacob and Bezold, Grace and Argueta, Genesis and Ricanek, Karl and King, Michael C. and Bowyer, Kevin}, title = {Goldilocks Test Sets for Face Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35504-35513} }
Rethinking Occlusion Modeling for UAV Tracking-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jian and Yu, Xincheng and Lin, Yi}, title = {Rethinking Occlusion Modeling for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13563-13573} }
LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, He and Guo, Yujun and He, Wei}, title = {LRDUN: A Low-Rank Deep Unfolding Network for Efficient Spectral Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10556-10566} }
Exploring Spatial Intelligence from a Generative Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Muzhi and Jiang, Shunyao and Zheng, Huanyi and Luo, Zekai and Zhong, Hao and Li, Anzhou and Wang, Kaijun and Rong, Jintao and Liu, Yang and Chen, Hao and Lin, Tao and Shen, Chunhua}, title = {Exploring Spatial Intelligence from a Generative Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2582-2592} }
The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Runhao and Wang, Hanshi and Yang, Yixiang and Ma, Qianli and Zhou, Jingmeng and Zhang, Zhipeng}, title = {The Blind Spot of Adaptation: Quantifying and Mitigating Forgetting in Fine-tuned Driving Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10621-10631} }
Abstract 3D Perception for Spatial Intelligence in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Zhou, Kaichen and Du, Yilun and Liang, Paul Pu and Pfister, Hanspeter}, title = {Abstract 3D Perception for Spatial Intelligence in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38647-38656} }
Learning to Infer Parameterized Representations of Plants from 3D Scans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghrer_2026_CVPR, author = {Ghrer, Samara and Godin, Christophe and Wuhrer, Stefanie}, title = {Learning to Infer Parameterized Representations of Plants from 3D Scans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42236-42245} }
Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boldisor_2026_CVPR, author = {Boldisor, Dragos-Alexandru and Smeu, Stefan and Oneata, Dan and Oneata, Elisabeta}, title = {Investigating Self-Supervised Representations for Audio-Visual Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43018-43029} }
CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sungyong and Choi, Sooyoung and Koh, Hyunsuh and Choi, Youngjae and Kim, Heewon}, title = {CLP: A Real-World Dataset of Contaminated Lens Protectors for Robust Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3794-3804} }
TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhijin and Jin, Shuo and Yu, Siyue and Wu, Shuwei and Zhang, Bingfeng and Yu, Li and Xiao, Jimin}, title = {TF-SSD: A Strong Pipeline via Synergic Mask Filter for Training-free Co-salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32216-32225} }
QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lilja_2026_CVPR, author = {Lilja, Adam and Lan, Ji and Fu, Junsheng and Hammarstrand, Lars}, title = {QueryOcc: Query-based Self-Supervision for 3D Semantic Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21399-21408} }
Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Peiyang and Yang, Longyu and Zhang, Lu and Saito, Kuniaki and Tan, Yap-Peng and Shen, Fumin and Shen, Heng Tao and Zhu, Xiaofeng and Hu, Ping}, title = {Structure-to-Intensity Diffusion for Adverse-Weather LiDAR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35904-35914} }
InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashutosh and Saini, Rajat and Pan, Jingjing and Erdogan, Mustafa and Zhang, Mingfang and Le Dem, Betty and Kobori, Norimasa and Kong, Quan}, title = {InstAP: Instance-Aware Vision-Language Pre-Train for Spatial-Temporal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3079-3090} }
AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Lidong and Chen, Guo and Wei, Zhu and Li, Zhiqi and Liu, Yicheng and Lu, Tong}, title = {AV-Reasoner: Improving and Benchmarking Clue-Grounded Audio-Visual Counting for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33477-33487} }
HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chao and Zhang, Boqian and Xu, Jinghao and Jiang, Guang}, title = {HDW-SR: High-Frequency Guided Diffusion Model based on Wavelet Decomposition for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23462-23472} }
WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gudavalli_2026_CVPR, author = {Gudavalli, Chandrakanth and Mohammed, Tajuddin Manhar and Yadav, Abhay and Bhaskar, Ananth Vishnu and Prajapati, Hardik and Peng, Cheng and Chellappa, Rama and Chandrasekaran, Shivkumar and Manjunath, B.S.}, title = {WRIVINDER: Towards Spatial Intelligence for Geo-locating Ground Images onto Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33703-33713} }
WaDi: Weight Direction-aware Distillation for One-step Image Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lei and Cheng, Yang and Li, Senmao and Wu, Ge and Wang, Yaxing and Yang, Jian}, title = {WaDi: Weight Direction-aware Distillation for One-step Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5574-5584} }
Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyue and Lu, Yang and Tian, Jiandong}, title = {Polarization State Tracing for Reflection Removal and Color-Consistent Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5680-5689} }
VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Tiancheng and Pan, Bowen and Chen, Lingxi and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Wu, Fan}, title = {VIAFormer: Voxel-Image Alignment Transformer for High-Fidelity Voxel Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29060-29070} }
MV-TAP: Tracking Any Point in Multi-View Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koo_2026_CVPR, author = {Koo, Jahyeok and Kim, In\`es Hyeonsu and Kim, Mungyeom and Park, Junghyun and Park, Seohyeon and Kim, Jaeyeong and Yi, Jung and Cho, Seokju and Kim, Seungryong}, title = {MV-TAP: Tracking Any Point in Multi-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20932-20941} }
MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yue and Xiao, Feng and Yuan, Yongzhe and Li, Hao and Feng, Kaiyuan and Gong, Maoguo and Miao, Qiguang and Ma, Wenping}, title = {MHopReg: Efficient Hierarchical Multi-Hop Graph Search for Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24217-24226} }
PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Deutsch_2026_CVPR, author = {Deutsch, Isaac and Mo\"enne-Loccoz, Nicolas and State, Gavriel and Gojcic, Zan}, title = {PPISP: Physically-Plausible Compensation and Control of Photometric Variations in Radiance Field Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7289-7298} }
MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Tianyu and Wang, Zefan and Wang, Chongyi and Huang, Fuwei and Ma, Wenshuo and He, Zhihui and Cai, Tianchi and Chen, Weize and Huang, Yuxiang and Zhao, Ranchi and Xu, Bokai and Cui, Junbo and Xu, Yingjing and Ruan, Liqing and Zhang, Luoyuan and Liu, Hanyu and Tang, Jingkun and Liu, Hongyuan and Guo, Qining and Hu, Wenhao and He, Bingxiang and Zhou, Jie and Cai, Jie and Qi, Ji and Guo, Zonghao and Chen, Chi and Zeng, Guoyang and Li, Yuxuan and Cui, Ganqu and Ding, Ning and Han, Xu and Yao, Yuan and Liu, Zhiyuan and Sun, Maosong}, title = {MiniCPM-V 4.5: Cooking Efficient MLLMs via Architecture, Data, and Training Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11704-11715} }
EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianwei and Liew, Jun Hao and Huang, Zilong and Lin, Zhijie and Feng, Jiashi and Liu, Xihui}, title = {EVATok: Adaptive Length Video Tokenization for Efficient Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23249-23259} }
End-to-End Language-Action Model for Humanoid Whole Body Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuxuan and Jiang, Haobin and Yao, Shiqing and Ding, Ziluo and Lu, Zongqing}, title = {End-to-End Language-Action Model for Humanoid Whole Body Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38399-38409} }
Self-Diffusion Driven Blind Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yanlong and Luo, Guanxiong}, title = {Self-Diffusion Driven Blind Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26711-26720} }
Delta Rectified Flow Sampling for Text-to-Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beaudouin_2026_CVPR, author = {Beaudouin, Gaspard and Li, Minghan and Kim, Jaeyeon and Yoon, Sung-Hoon and Wang, Mengyu}, title = {Delta Rectified Flow Sampling for Text-to-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18662-18672} }
Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaodong and Bao, Qian and Liu, Xudong and Fang, Jianping and Fang, Jintao and Zhang, Yongdong and Mei, Tao and Liu, Wu}, title = {Multi-level Causal LLM-based Text-to-Motion Generation with Human Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9342-9351} }
EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jinzhao and Chen, Yinuo and Piao, Dongxu and Pan, Panwang and Yu, Yifan and Wang, Dong and Yan, Honglei and Yue, Liang and Wang, Shaofei and Chen, Yixin and Huang, Siyuan and Liu, Miao}, title = {EgoProx: Evaluating MLLMs on Egocentric 3D Proximity Reasoning Across a Cognitive Hierarchy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23751-23762} }
IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Wei and Wu, Haifeng and Jiang, Shiyin and Zhang, Jinhua and Ji, Xinchun and Gu, Shuhang}, title = {IDESplat: Iterative Depth Probability Estimation for Generalizable 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33248-33258} }
Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Borhan_2026_CVPR, author = {Borhan, Uddin Md. and Raza, Arif and Lin, Zhiliang and Wang, Lu and Li, Jianqiang and Chen, Jie}, title = {Reliable Policy Transfer for Safety-Aware End-to-End Driving with Deep Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32134-32143} }
LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Zhang, Luyuan and Zhang, Zhao and Li, Siyuan and Huang, Zhe and Yang, Zhenyu and Lu, Haonan}, title = {LacTokGen: Latent Consistency Tokenizer for 1024-pixel Image Generation by 256 Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30368-30380} }
ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuantao and Chang, Jiahao and Ye, Chongjie and Zhang, Chaoran and Fang, Zhaojie and Li, Chenghong and Han, Xiaoguang}, title = {ForeHOI: Feed-forward 3D Object Reconstruction from Daily Hand-Object Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8868-8879} }
BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images-
[pdf]
[supp]
[bibtex]@InProceedings{Skuddis_2026_CVPR, author = {Skuddis, David and Ress, Vincent and Zhang, Wei and Nyako, Vincent Ofosu and Haala, Norbert}, title = {BEV-SLD: Self-Supervised Scene Landmark Detection for Global Localization with LiDAR Bird's-Eye View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31400-31409} }
Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yunhe and Zhang, Yabin and Wang, Chong and Liu, Jiaming and Varma, Maya and Delbrouck, Jean-Benoit and Chaudhari, Akshay and Langlotz, Curtis}, title = {Learning Generalizable 3D Medical Image Representations from Mask-Guided Self-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13744-13754} }
ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Yuming and Wu, Dong and Zha, Hongbin}, title = {ST4R-Splat: Spatio-Temporal Referring Segmentation in 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17598-17608} }
Dejavu: Towards Experience Feedback Learning for Embodied Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shaokai and Ji, Yanbiao and Li, Qiuchang and Zhang, Zhiyi and He, Qichen and Xie, Wenyuan and Zhang, Guodong and Bayramli, Bayram and Ding, Yue and Lu, Hongtao}, title = {Dejavu: Towards Experience Feedback Learning for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29578-29587} }
Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Bolin and Wang, XuDong and Rambhatla, Saketh and Rehg, James M. and Kira, Zsolt and Girdhar, Rohit and Misra, Ishan}, title = {Toward Diffusible High-Dimensional Latent Spaces: A Frequency Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43450-43460} }
Archon: A Unified Multimodal Model for Holistic Digital Human Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Chong and Liu, Shichen and Yu, Lijun and Futschik, David and Moschoglou, Stylianos and Srivastava, Shefali and Bai, Ziqian and Tan, Feitong and Zhang, Guofeng and Cui, Zhaopeng and Fanello, Sean and Zhang, Yinda}, title = {Archon: A Unified Multimodal Model for Holistic Digital Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16464-16474} }
Suppressing Non-Semantic Noise in Masked Image Modeling Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Hjelkrem-Tan_2026_CVPR, author = {Hjelkrem-Tan, Martine and Aasan, Marius and Chakraborty, Rwiddhi and Arteaga, Gabriel Y. and Choi, Changkyu and Rivera, Ad{\'\i}n Ram{\'\i}rez}, title = {Suppressing Non-Semantic Noise in Masked Image Modeling Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19319-19329} }
CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Jingyu and Wang, Gaoang and Lee, Der-Horng}, title = {CORE: Compact Object-centric REpresentations as a New Paradigm for Token Merging in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39593-39605} }
Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karimijafarbigloo_2026_CVPR, author = {Karimijafarbigloo, Sanaz and Khosravi, Armin and Kheyrkhah, Alireza and Azad, Reza and Reyes, Mauricio and Merhof, Dorit}, title = {Harmonized Feature Conditioning and Frequency-Prompt Personalization for Multi-Rater Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22794-22803} }
Velox: Learning Representations of 4D Geometry and Appearance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Malik_2026_CVPR, author = {Malik, Anagh and Chan, Dorian and Zhao, Xiaoming and Lindell, David B. and Tuzel, Oncel and Chang, Jen-Hao Rick}, title = {Velox: Learning Representations of 4D Geometry and Appearance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19894-19906} }
Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Kejia and Zhou, Haoyang and Xu, Ruoyu and Wang, Peicheng and Song, Mingli and Zhang, Haofei}, title = {Beyond Matching to Tiles: Bridging Unaligned Aerial and Satellite Views for Vision-Only UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5359-5368} }
TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Li, Ya-Li and Wang, Yuan and Wang, Shengjin}, title = {TRM-VLA: Temporal-Aware Chain-of-Thought Reasoning and Memorization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10943-10953} }
Make it SING: Analyzing Semantic Invariants in Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadid_2026_CVPR, author = {Yadid, Harel and Levi, Meir Yossef and Betser, Roy and Gilboa, Guy}, title = {Make it SING: Analyzing Semantic Invariants in Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9911-9920} }
PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Anni and Yang, Yu-Bin}, title = {PRISM: Prototype-based Reasoning with Inter-modal Semantic Mining for Interpretable Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2853-2863} }
Aligning Text, Images and 3D Structure Token-by-Token-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Tibrewal, Vansh and Gkioxari, Georgia}, title = {Aligning Text, Images and 3D Structure Token-by-Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14905-14914} }
Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Xijie and Zhu, Lin and Zhang, Wei and Tian, Yonghong}, title = {Event Structural Valley: A Unified Theoretical and Practical Framework for Event Camera Autofocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {936-945} }
Linking Perception, Confidence and Accuracy in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yuetian and Wang, Yucheng and Zhang, Rongyu and Xu, Zhijie and Yang, Boyu and Kong, Ming and Liu, Jie and Zhu, Qiang}, title = {Linking Perception, Confidence and Accuracy in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25914-25924} }
Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Haoxiang and Li, Yaokun and Huang, Zeyuan and Gao, Cangjun and He, Qiang and Li, Qingkun and Deng, Xiaoming and Ma, Cuixia and Lai, Yu-Kun and Liu, Yong-Jin and Wang, Hongan}, title = {Diagram2Structure: Unlocking LLMs' Diagram Comprehension through DiagramDiff, a Framework for Structuring Offline Diagrams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24395-24404} }
BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Miaowei and Yan, Qingxuan and Cao, Zhi and Li, Yayuan and Mac Aodha, Oisin and Corso, Jason J and Vaxman, Amir}, title = {BiMotion: B-spline Motion for Text-guided Dynamic 3D Character Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10152-10164} }
AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Chowdhury, Arpita and Wang, Zihe and Jeon, Sooyoung and Wang, Lemeng and Hou, Jiacheng and Kil, Jihyung and Chao, Wei-Lun}, title = {AVA-Bench: Atomic Visual Ability Benchmark for Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25925-25937} }
MacTok: Robust Continuous Tokenization for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Hengyu and Gao, Xin and Li, Guanghao and Yan, Yuxiang and Ruan, Jiaoyang and Ma, Junpeng and Wang, Haoyu Albert and Pu, Jian}, title = {MacTok: Robust Continuous Tokenization for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43407-43417} }
JRM: Joint Reconstruction Model for Multiple Objects without Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qirui and Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Newcombe, Richard and Chang, Angel X. and Engel, Jakob and Howard-Jenkins, Henry}, title = {JRM: Joint Reconstruction Model for Multiple Objects without Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {307-316} }
CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Maoyuan and Gao, Yutong and Huang, Xinyang and Sun, Lijuan and Nan, Guoshun and Zhu, Chuang}, title = {CAPT: Confusion-Aware Prompt Tuning for Reducing Vision-Language Misalignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3154-3164} }
MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Strohmeyer_2026_CVPR, author = {Strohmeyer, Tim and Morin, Lucas and Meijer, Gerhard Ingmar and Weber, Valery and Nassar, Ahmed and Staar, Peter}, title = {MarkushGrapher-2: End-to-end Multimodal Recognition of Chemical Structures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31176-31185} }
MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Purohit_2026_CVPR, author = {Purohit, Mirali and Gajera, Bimal and Mehta, Irish and Tokas, Bhanu and Adler, Jacob and Lu, Steven and Dickenshied, Scott and Diniega, Serina and Bue, Brian and Rebbapragada, Umaa and Kerner, Hannah}, title = {MOMO: Mars Orbital MOdel Foundation Model for Mars Orbital Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27772-27782} }
EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, GwangWook and Lee, Hyo-Jun and Baek, Jong-Hyeon and Kim, Hanul and Koh, Yeong Jun}, title = {EG-3DVG: Expression and Geometry Aware Grounding Decoder for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2625-2634} }
LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Zebin and Nie, Shen and Zhang, Xiaolu and ZHOU, JUN and Lu, Zhiwu and Wen, Ji-Rong and Li, Chongxuan}, title = {LLaDA-V: Large Language Diffusion Models with Visual Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10093-10105} }
C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haitman_2026_CVPR, author = {Haitman, Yuval and Efraim, Amit and Francos, Joseph M.}, title = {C-GenReg: Training-Free 3D Point Cloud Registration by Multi-View-Consistent Geometry-to-Image Generation with Probabilistic Modalities Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3004-3013} }
Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Berman_2026_CVPR, author = {Berman, Nimrod and Botach, Adam and Ben-Baruch, Emanuel and Hakimi, Shunit Haviv and Gendler, Asaf and Naiman, Ilan and Yosef, Erez and Kviatkovsky, Igor}, title = {Scene-VLM: Multimodal Video Scene Segmentation via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39848-39857} }
Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Zhou, Zhangjun and Guo, Diandian and Li, Zhixi and Qin, Jing and Du, Bo and Heng, Pheng-Ann}, title = {Synergistic Bleeding Region and Point Detection in Laparoscopic Surgical Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1396-1405} }
CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haoyu and Chen, Xiaoliang and Miao, Duoqian and Qin, Xiaolin and Li, Xianyong and Du, Yajun}, title = {CICA: Coupling Confidence-Aware Pretraining with Confidence-Informed Attention for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37842-37851} }
LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Qihao and Liu, Jiarun and Ni, Ziqian and Xu, Jianyun and Yang, Sheng and Xie, Tao and Zhao, Lijun and Li, Ruifeng}, title = {LiDAR Prompted Spatio-Temporal Multi-View Stereo for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14567-14577} }
LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jihao and Xie, Lingxi and Huo, Xinyue and Tian, Qi and Ye, Qixiang}, title = {LongVideo-R1: Smart Navigation for Low-cost Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40505-40515} }
Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaole and Pang, Qingsong and Zhang, Xiaobo and Xu, Xun and Gong, Xun and Yang, Yan and Li, Tianrui}, title = {Zero-Shot Image Denoising via Hybrid Prior-Guided Pseudo Sample Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22648-22657} }
CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Chenxi and Deng, Yongheng and Liu, Jiani and Zhang, Yujia and Chen, Xi and Ren, Ju}, title = {CoIn: Coverage and Informativeness-Guided Token Reduction for Efficient Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10492-10501} }
EVA: Efficient Reinforcement Learning for End-to-End Video Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaolun and Wang, Ruohui and Wang, Jiahao and Tang, Yepeng and Zheng, Xuanyu and Duan, Haonan and Lu, Hao and Deng, Hanming and Lu, Lewei}, title = {EVA: Efficient Reinforcement Learning for End-to-End Video Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12289-12299} }
HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eunju and Kim, MiHyeon and Kwon, JuneHyoung and Lee, Yoonji and Kim, JiHyun and Jang, Soojin and Kim, YoungBin}, title = {HyCal: A Training-Free Prototype Calibration Method for Cross-Discipline Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29462-29471} }
ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kai and Zhang, Zekai and Sun, Xihe and Wang, Anpeng and Nie, Jingmeng and Chen, Qinghui and Hao, Han and Guo, Jianyuan and Zhang, Jinglin}, title = {ADSeeker: A Knowledge-Grounded Reasoning Framework for Industry Anomaly Detection and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21379-21388} }
When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Junrong and Deng, Weijian and Wei, Pengxu and Chen, Yaqin and Ye, Qixiang and Lin, Liang}, title = {When Local Rules Create Global Order: Self-Organized Representation Learning for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9445-9454} }
HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Hao and Zhang, Jiabin and Wu, Yajing and Pang, Ruixuan and Li, Jing}, title = {HDR-VLM: HDR-Domain Adaptation of VLMs and Preference-Aligned Quality Assessment for HDR Video Color Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40792-40801} }
MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiyu and Toisoul, Antoine and Monnier, Tom and Shapovalov, Roman and Ranjan, Rakesh and Tan, Ping and Vedaldi, Andrea}, title = {MeshFlow: Efficient Artistic Mesh Generation via MeshVAE and Flow-based Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5849-5858} }
ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yashima_2026_CVPR, author = {Yashima, Daichi and Kurita, Shuhei and Oda, Yusuke and Sugiura, Komei}, title = {ReMoRa: Multimodal Large Language Model based on Refined Motion Representation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31845-31855} }
Seeing without Pixels: Perception from Camera Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Zihui and Grauman, Kristen and Damen, Dima and Zisserman, Andrew and Han, Tengda}, title = {Seeing without Pixels: Perception from Camera Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38836-38847} }
UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Xuangeng and Liu, Ruicong and Huang, Yifei and Liu, Yun and Peng, Yichen and Zheng, Bo}, title = {UniLS: End-to-End Audio-Driven Avatars for Unified Listening and Speaking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25142-25152} }
CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiyi and Huang, Yukun and Li, Yu and Bai, Jianhong and Wang, Xintao and Lin, Zinan and Ning, Xuefei and Yu, Jiwen and Wang, Yu and Liu, Xihui}, title = {CineScene: Implicit 3D as Effective Scene Representation for Cinematic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25381-25392} }
NIL: No-data Imitation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Albaba_2026_CVPR, author = {Albaba, Mert and Li, Chenhao and Diomataris, Markos and Taheri, Omid and Krause, Andreas and Black, Michael J.}, title = {NIL: No-data Imitation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20823-20833} }
CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning-
[pdf]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Yubin and Hou, Boyang and Meng, Yuan and Luo, Wenting and Ji, Jiayi and Sun, Xiaoshuai}, title = {CrackSSM: Reviving SSMs for Crack Segmentation via Dynamic Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10721-10730} }
A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Jianlu and Feng, Fu and Xu, Jiaze and Xie, Yucheng and Lv, Jiaqi and Geng, Xin}, title = {A Unified Framework for Knowledge Transfer in Bidirectional Model Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34535-34545} }
Humanoid Generative Pre-Training for Zero-Shot Motion Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Zekun and Chen, Xuchuan and Wang, Jilong and Lin, Chenghuai and Lian, Yunrui and Zhang, Wenyao and Yu, Xinqiang and Wang, He and Yi, Li}, title = {Humanoid Generative Pre-Training for Zero-Shot Motion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20834-20844} }
MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jun Yeong and Seo, JunYoung and Kang, Minji and Park, Yu Rang}, title = {MoECLIP: Patch-Specialized Experts for Zero-shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35534-35544} }
SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Qiang and Wei, Jiajie and Yi, Zhenyu and Yan, Zhifen and Guo, Yingjie and Shi, Hongkuan and Ji, Ge-Peng and Li, Qiang and Wang, Zhiwei}, title = {SAMIX: Reinforcing SAM2 with Semantic Adapter and Reference Selecting Policy for Mix-Supervised Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17948-17958} }
Rectifying Latent Space for Generative Single-Image Reflection Removal-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingjia and Hu, Jin and Wang, Hainuo and Hu, Qiming and Wang, Jiarui and Guo, Xiaojie}, title = {Rectifying Latent Space for Generative Single-Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8397-8407} }
Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Tang, Guijian and Hu, Kun and Wang, Haotian and Liu, Shixuan and Yang, Wenjing and Lan, Long and Tan, Huibin}, title = {Unstitching the Chimera: Frame-Level Risk and Train-Free Mitigation for Video Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4188-4198} }
Refracting Reality: Generating Images with Realistic Transparent Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Yue and Tao, Enze and Campbell, Dylan}, title = {Refracting Reality: Generating Images with Realistic Transparent Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4312-4321} }
SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zhicheng and Meng, Jiarui and Luo, Tong-an and Huang, Yican and Feng, Xuan and Li, Xuanfu and Xu, Zhan}, title = {SLARM: Streaming and Language-Aligned Reconstruction Model for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29023-29034} }
Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Peiyu and Zhao, Shuang and Sun, Xin and Mullia, Krishna and Fei, Raymond and Georgiev, Iliyan}, title = {Stochastic Ray Tracing for the Reconstruction of 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19001-19010} }
Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Peibo and Xue, Xiaotian and Zhang, Jinshuo and Wang, Zihao and Liu, Jinhua and Fu, Shujun and Bao, Fangxun and Yeo, Si Yong}, title = {Uni-Encoder Meets Multi-Encoders: Representation Before Fusion for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15627-15638} }
Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wongi and Lee, Kyungryeol and Seo, Hoigi and Chun, Se Young}, title = {Training-free Mixed-Resolution Latent Upsampling for Spatially Accelerated Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18673-18682} }
PersonaLive! Expressive Portrait Image Animation for Live Streaming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiyuan and Pun, Chi-Man and Fang, Chen and Wang, Jue and Cun, Xiaodong}, title = {PersonaLive! Expressive Portrait Image Animation for Live Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18118-18128} }
ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sastry_2026_CVPR, author = {Sastry, Srikumar and Khanal, Subash and Dhakal, Aayush and Lin, Jiayu and Cher, Dan and Jarosz, Phoenix and Jacobs, Nathan}, title = {ProM3E: Probabilistic Masked MultiModal Embedding Model for Ecology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20564-20574} }
Unified Camera Positional Encoding for Controlled Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Cheng and Li, Boying and Wei, Meng and Cao, Yan-Pei and Gambardella, Camilo and Phung, Dinh and Cai, Jianfei}, title = {Unified Camera Positional Encoding for Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38027-38037} }
Toward Low-Cost yet Effective Temporal Learning for UAV Tracking-
[pdf]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Chaocan and Liang, Qihua and Zhong, Bineng and Zu, Yanting and Xue, Yuanliang and Xia, Haiying and Song, Shuxiang}, title = {Toward Low-Cost yet Effective Temporal Learning for UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42538-42548} }
Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziyi and Shen, Li and Ye, Deheng and Luo, Yong and Zhao, Huangxuan and Liu, Meng and Yu, Wei and Zhang, Lefei}, title = {Refining Few-Step Text-to-Multiview Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2401-2411} }
Rethinking Intermediate Representation for VLM-based Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Weiliang and Gao, Jialin and Pan, Jia-Hui and Wang, Gang and Li, Li Erran and Liu, Yun-Hui and Ding, Mingyu and Heng, Pheng-Ann and Fu, Chi-Wing}, title = {Rethinking Intermediate Representation for VLM-based Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29652-29662} }
Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization-
[pdf]
[supp]
[bibtex]@InProceedings{Matinkia_2026_CVPR, author = {Matinkia, Mohammadjavad and Ray, Nilanjan}, title = {Learning Diffeomorphism for Medical Image Registration with Time-Embedded Architectures Using Semigroup Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28775-28785} }
FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Delitzas_2026_CVPR, author = {Delitzas, Alexandros and Zhang, Chenyangguang and Gavryushin, Alexey and Di Mario, Tommaso and Sun, Boyang and Dabral, Rishabh and Guibas, Leonidas and Theobalt, Christian and Pollefeys, Marc and Engelmann, Francis and Barath, Daniel}, title = {FUN REC * Reconstructing Functional 3D Scenes from Egocentric Interaction Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28446-28457} }
Tri-Modal Fusion Transformers for UAV-based Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Iaboni_2026_CVPR, author = {Iaboni, Craig and Abichandani, Pramod}, title = {Tri-Modal Fusion Transformers for UAV-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4373-4382} }
RetFormer: Multimodal Retrieval for Enhancing Image Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Tianrui and Liang, Xiubo and Wang, Hongzhi}, title = {RetFormer: Multimodal Retrieval for Enhancing Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2704-2714} }
Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2026_CVPR, author = {Zong, Chen-Chen and Chi, Yu-Qi and Wang, Xie-Yang and Cui, Yan and Huang, Sheng-Jun}, title = {Revisiting Unknowns: Towards Effective and Efficient Open-Set Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17756-17765} }
Driving on Registers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kirby_2026_CVPR, author = {Kirby, Ellington and Boulch, Alexandre and Xu, Yihong and Yin, Yuan and Puy, Gilles and Zablocki, \'Eloi and Bursuc, Andrei and Gidaris, Spyros and Marlet, Renaud and Bartoccioni, Florent and Cao, Anh-Quan and Samet, Nermin and VU, Tuan-Hung and Cord, Matthieu}, title = {Driving on Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32058-32069} }
MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Dohwan and Park, Jinyoung and Choi, Seoung and Lee, Sanghyeok and Lee, Seohyun and Kim, Hyunwoo J.}, title = {MoE-GRPO: Optimizing Mixture-of-Experts via Reinforcement Learning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14957-14967} }
Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Zeng, Xiangxiang and Ma, Tengfei and Xing, Yucheng and Ren, Xuanbai and Liu, Yiping}, title = {Sparse Task Vector Mixup with Hypernetworks for Efficient Knowledge Transfer in Whole-Slide Image Prognosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35238-35247} }
TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Agarwal, Shruti and Collomosse, John and Xie, Pengtao and Asnani, Vishal}, title = {TokenTrace: Multi-Concept Attribution through Watermarked Token Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42942-42952} }
Twin-T & TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Jiahua and Cheng, Siyao and Du, Jiaxing and Xia, Qingtao and He, Changjiang and Lang, Zeming and Liu, Jie}, title = {Twin-T \& TwintVQA: A Reliable Structure-Detail Separating VLM and a Comprehensive Benchmark for Chart and Table Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4850-4859} }
HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Chengjie and Pan, Cong and Liu, Zijian and Liu, Ningzhong and Qin, Jie}, title = {HTNav: A Hybrid Navigation Framework with Tiered Structure for Urban Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10976-10985} }
NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sanyal_2026_CVPR, author = {Sanyal, Subhajit and Miriyala, Srinivas Soumitri and Bankar, Akshay Janardan and Arveti, Manjunath and Vajrala, Sowmya and Pandith, Shreyas and Kodavanti, Sravanth and Ameta, Abhishek and Harshit, Harshit and Unde, Amit Satish}, title = {NanoSD: Edge Efficient Foundation Model for Real Time Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8449-8459} }
MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2026_CVPR, author = {Chung, Sangyun and Kim, Se Yeon and Chee, Youngchae and Ro, Yong Man}, title = {MAD: Modality-Adaptive Decoding for Mitigating Cross-Modal Hallucinations in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40175-40185} }
MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Xincheng and Qian, Zefeng and Shi, Chao and Song, Jiayang and Zhang, Chongyang}, title = {MMR-AD: A Large-Scale Multimodal Dataset for Benchmarking General Anomaly Detection with Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43072-43082} }
Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haidong and Bhayani, Snehal and Heikkila, Janne}, title = {Solving Minimal Problems Without Matrix Inversion Using FFT-Based Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19771-19780} }
Teacher-Guided Routing for Sparse Vision Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kada_2026_CVPR, author = {Kada, Masahiro and Yoshihashi, Ryota and Ikehata, Satoshi and Kawakami, Rei and Sato, Ikuro}, title = {Teacher-Guided Routing for Sparse Vision Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6198-6208} }
DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingyang and Tesfai, Samuel and Zhang, Zhekai and Xi, Haocheng and Yang, Shuo and Zhang, Lvmin and Sun, Yufei and Peng, Kelly and Agrawala, Maneesh and Stoica, Ion and Keutzer, Kurt and Zhu, Jun-Yan and Han, Song and Lin, Yujun and Li, Muyang}, title = {DeltaQuant: 4-bit Video Diffusion Models with Spatiotemporal Delta Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43578-43588} }
AstraNav-Memory: Contexts Compression for Long Memory-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junjun and Xue, Xinda and Ren, Botao and Luo, Minghua and Chen, Jintao and Bai, Haochen and You, Liangliang and Xu, Mu}, title = {AstraNav-Memory: Contexts Compression for Long Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8097-8109} }
Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, June Suk and Lee, Kyungmin and Yu, Sihyun and Choi, Yisol and Shin, Jinwoo and Lee, Kimin}, title = {Improving Motion in Image-to-Video Models via Adaptive Low-Pass Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40364-40375} }
Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Weihao and Wang, Runqi and Duan, Xiaoyue and Zhang, Jinchao and Yang, Ang and Jing, Liping}, title = {Parameter-Efficient Semantic Augmentation for Enhancing Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20367-20376} }
Globally Optimal Pose from Orthographic Silhouettes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sengupta_2026_CVPR, author = {Sengupta, Agniva and Kus, Dilara and Li, Jianning and Zachow, Stefan}, title = {Globally Optimal Pose from Orthographic Silhouettes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11029-11038} }
Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xin and Tian, Shujun and Lu, Tao and Bao, Han and Wang, Zonghui and Chen, Wenzhi}, title = {Otil: Accelerating Diffusion Model Inference via Communication-Efficient Multi-GPU Parallelism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38487-38497} }
Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Minh-Duong and Wanasekara, Senura and Nguyen, Le-Tuan and Pham, Quoc-Viet and Yong, Ken-Tye and Tran, Nguyen H. and Le, Dung D.}, title = {Computation and Communication Efficient Federated Unlearning via On-server Gradient Conflict Mitigation and Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3347-3357} }
RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Jianghan and Song, Hong and Li, Jinfu and Lin, Yucong and Ma, Shihan and Fan, Jingfan and Ai, Danni and Fu, Tianyu and Xiao, Deqiang and Yang, Jian}, title = {RegionFuse: Region-Adaptive Pixel Distribution Learning for Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19539-19548} }
AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors-
[pdf]
[supp]
[bibtex]@InProceedings{Fucka_2026_CVPR, author = {Fu\v{c}ka, Matic and Zavrtanik, Vitjan and Sko\v{c}aj, Danijel}, title = {AnomalyVFM -- Transforming Vision Foundation Models into Zero-Shot Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35555-35566} }
VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Junwen and Liang, Jinglin and Chen, Hongyuan and Huang, Shuangping}, title = {VDE: Training-Free Accelerating Rectified Flow Model via Velocity Decomposition and Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37918-37928} }
TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunxiao and Li, Lijun and Shao, Jing}, title = {TreeTeaming: Autonomous Red-Teaming of Vision-Language Models via Hierarchical Strategy Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37684-37693} }
Task-Driven Implicit Representations for Automated Design of LiDAR Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Young, Aaron and Klinghoffer, Tzofi and Dave, Akshat and Raskar, Ramesh}, title = {Task-Driven Implicit Representations for Automated Design of LiDAR Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24248-24257} }
Mind the Gap: Transferring Labels to Align Object Detection Datasets-
[pdf]
[supp]
[bibtex]@InProceedings{Kennerley_2026_CVPR, author = {Kennerley, Mikhail and Aviles-Rivero, Angelica I. and Sch\"onlieb, Carola-Bibiane and Tan, Robby T.}, title = {Mind the Gap: Transferring Labels to Align Object Detection Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4353-4362} }
Decoupled Generative Modeling for Human-Object Interaction Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Hwanhee and Lee, Seunggwan and Yoon, Jeongyoon and Kim, SeungHyeon and Nam, Giljoo and Huang, Qixing and Kim, Sangpil}, title = {Decoupled Generative Modeling for Human-Object Interaction Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2253-2263} }
Learning Latent Concepts for Detecting Out-of-Distribution Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ting and Dong, Junhao and Ong, Yew-Soon}, title = {Learning Latent Concepts for Detecting Out-of-Distribution Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28723-28733} }
Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jerry and Sun, Haowen and Gudovskiy, Denis and Nakata, Yohei and Okuno, Tomoyuki and Keutzer, Kurt and Zheng, Wenzhao}, title = {Proxy3D: Efficient 3D Representations for Vision-Language Models via Semantic Clustering and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23816-23825} }
HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Hongji and Zhou, Yucheng and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {HiCoGen: Hierarchical Compositional Text-to-Image Generation in Diffusion Models via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36793-36802} }
SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Sashuai and Zhou, Qiang and Ma, Junpeng and Cao, Yue and Hu, Ruofan and Zhang, Ziang and Yang, Xiaoda and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Zhao, Zhou}, title = {SpatialReward: Verifiable Spatial Reward Modeling for Fine-Grained Spatial Consistency in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {647-658} }
Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yisheng}, title = {Feed-Forward One-Shot Animatable Textured Mesh Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4145-4156} }
QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Puyin_2026_CVPR, author = {Puyin, Li and Xiang, Tiange and Mao, Ella and Wei, Shirley and Chen, Xinye and Masood, Adnan and Fei-Fei, Li and Adeli, Ehsan}, title = {QUANTIPHY: A Quantitative Benchmark Evaluating Physical Reasoning Abilities of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33174-33184} }
From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ranran and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {From None to All: Self-Supervised 3D Reconstruction via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37358-37369} }
Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Sizhong and Weber, Ramon Elias and Lu, Xinzheng}, title = {Tokenization Allows Multimodal Large Language Models to Understand, Generate and Edit Architectural Floor Plans}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10430-10440} }
RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuezhen and Ma, Li and Shen, Yulin and Wang, Zeyu and Sander, Pedro V.}, title = {RetimeGS: Continuous-Time Reconstruction of 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7340-7350} }
MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibo and Zhang, Yigong and Xie, Jin}, title = {MV3DIS: Multi-View Mask Matching via 3D Guides for Zero-Shot 3D Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17916-17926} }
InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sakai_2026_CVPR, author = {Sakai, Shunsuke and He, Xiangteng and Gu, Chunzhi and Sigal, Leonid and Hasegawa, Tatsuhito}, title = {InvAD: Inversion-based Reconstruction-Free Anomaly Detection with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21389-21398} }
PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Guo, Dan and Cao, Junzhe and Xu, Yong and Zou, Bochao and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {PHASE-Net: Physics-Grounded Harmonic Attention System for Efficient Remote Photoplethysmography Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21198-21207} }
Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yifan and Xiao, Zeqi and Wei, Tianyi and Yang, Shuai and Pan, Xingang}, title = {Trainable Log-linear Sparse Attention for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9424-9433} }
PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories-
[pdf]
[supp]
[bibtex]@InProceedings{Tarres_2026_CVPR, author = {Tarr\'es, Gemma Canet and Baradad, Manel and Moreno-Noguer, Francesc and Li, Yumeng}, title = {PLACID: Identity-Preserving Multi-Object Compositing via Video Diffusion with Synthetic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38060-38070} }
OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Keda and Shao, Kele and Yu, Bohan and Wang, Weiqiang and Liu, Jian and Wang, Huan}, title = {OmniZip: Audio-Guided Dynamic Token Compression for Fast Omnimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17682-17692} }
Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhangquan and Zhang, Manyuan and Yu, Xinlei and Luo, Xufang and Sun, Mingze and Pan, Zihao and An, Xiang and Feng, Yan and Pei, Peng and Cai, Xunliang and Huang, Ruqi}, title = {Think with 3D: Geometric Imagination Grounded Spatial Reasoning from Limited Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2613-2624} }
Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shuo and Li, Yijin and Zheng, Xi and Zhang, Guofeng}, title = {Neural Field-Based 3D Surface Reconstruction of Microstructures from Multi-Detector Signals in Scanning Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7266-7277} }
TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shu_2026_CVPR, author = {Shu, Yan and Ren, Bin and Xiong, Zhitong and Zhu, Xiao Xiang and Demir, Beg\"um and Sebe, Nicu and Rota, Paolo}, title = {TerraScope: Pixel-Grounded Visual Reasoning for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16712-16722} }
RAVEN: Erasing Invisible Watermarks via Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shamshad_2026_CVPR, author = {Shamshad, Fahad and Lukas, Nils and Nandakumar, Karthik}, title = {RAVEN: Erasing Invisible Watermarks via Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {82-91} }
FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Wang, Shuang and Wang, Yang}, title = {FedMPT: Federated Multi-Label Prompt Tuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17226-17236} }
Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Haoran and Huang, Guoxi and Zhang, Fan and Bull, David and Anantrasirichai, Nantheera}, title = {Prune Wisely, Reconstruct Sharply: Compact 3D Gaussian Splatting via Adaptive Pruning and Difference-of-Gaussian Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11716-11725} }
Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Chen and Zhi, Zhuo and Huang, Zhao and Ge, Jiawei and Xiao, Ling and Sebe, Nicu and Tzimiropoulos, Georgios and Patras, Ioannis}, title = {Deconstructing the Failure of Ideal Noise Correction: A Three-Pillar Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34512-34523} }
Temporal Interaction in Spiking Transformers with Multi-Delay Mixer-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Kexin and Liu, Hanwen and Song, Zeyang and Liu, Yang and Zhang, Jieyuan and Wang, Shuai and Wu, Jibin and Zhang, Malu and Yang, Yang}, title = {Temporal Interaction in Spiking Transformers with Multi-Delay Mixer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34555-34565} }
Fine-Grained GRPO for Precise Preference Alignment in Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yujie and Ling, Pengyang and Bu, Jiazi and Wang, Yibin and Zang, Yuhang and Wang, Jiaqi and Niu, Li and Zhai, Guangtao}, title = {Fine-Grained GRPO for Precise Preference Alignment in Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20045-20054} }
Paparazzo: Active Mapping of Moving 3D Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Allegro_2026_CVPR, author = {Allegro, Davide and Li, Shiyao and Ghidoni, Stefano and Lepetit, Vincent}, title = {Paparazzo: Active Mapping of Moving 3D Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12586-12594} }
Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Jiwon and Choi, Yeji and Lee, JoungBin and Jang, Wooseok and Choi, Jinhyeok and Kang, Taekeun and Park, Yongjae and Kim, Myungin and Kim, Seungryong}, title = {Attribute-Preserving Pseudo-Labeling for Diffusion-Based Face Swapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18651-18661} }
Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Daiya_2026_CVPR, author = {Daiya, Divyanshu and Bera, Aniket}, title = {Sketch2Colab: Sketch-Conditioned Multi-Human Animation via Controllable Flow Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30675-30685} }
Efficient Equivariant Transformer for Self-Driving Agent Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Scott and Chen, Dian and Wong, Kelvin and Zhang, Chris and Fallah, Kion and Urtasun, Raquel}, title = {Efficient Equivariant Transformer for Self-Driving Agent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32164-32172} }
Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Wei and Yang, Li and Wang, Yufei and Xiao, Han and Cai, Boyu and Hu, Weiming}, title = {Temporal Representation Enhancement (TRE): Learning to Forget Dominant Patterns for Enhanced Temporal Spiking Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34480-34490} }
Dynamic Token Reweighting for Robust Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Tanqiu and Liang, Jiacheng and Zhu, Rongyi and Zhou, Jiawei and Ma, Fenglong and Wang, Ting}, title = {Dynamic Token Reweighting for Robust Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24481-24491} }
Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Siwei and Ji, Haonian and Xin, Siyang and Shi, Juanquan and Qiu, Shi and Ye, Xinyu and Xia, Peng and Liu, Jiaqi and Chen, Zhaorun and Zhou, Yiyang and Li, Linjie and Wang, Lijuan and Yao, Huaxiu}, title = {Paper2Figure: A Multi-Agent Collaborative System for Figure Generation Towards Academic Research Paper}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29157-29166} }
OctoT2I: A Self-Evolving Agentic Text-to-Image Router-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xu and Chen, Bin and Li, Gehui and Duan, Yule and Wang, Ronggang and Zhang, Jian}, title = {OctoT2I: A Self-Evolving Agentic Text-to-Image Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31628-31638} }
Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yue and Ma, Qi and Yang, Runyi and Ma, Mengjiao and Ren, Bin and Popovic, Nikola and Sebe, Nicu and Gevers, Theo and Van Gool, Luc and Paudel, Danda Pani and Oswald, Martin R.}, title = {Chorus: Multi-Teacher Pretraining for Holistic 3D Gaussian Scene Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21431-21442} }
FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Fengyuan and Jin, Haiyan and Zhang, Yuanlin and Xiao, Zhaolin and Wang, Bin and Mu, Yuerong}, title = {FlowFM: Advancing Dark Optical Flow Estimation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6837-6846} }
Residual Diffusion Bridge Model for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hebaixu and Zhang, Jing and Chen, Haoyang and Guo, Haonan and Wang, Di and Ma, Jiayi and Du, Bo}, title = {Residual Diffusion Bridge Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8375-8386} }
Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ernst_2026_CVPR, author = {Ernst, Jonas and Boettcher, Wolfgang and Hoyer, Lukas and Lenssen, Jan Eric and Schiele, Bernt}, title = {Rewis3d: Reconstruction Improves Weakly-Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13091-13101} }
Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Jaeyun and Shin, Seunghui and Park, Taeho and Hwang, Hyoseok}, title = {Keep it SymPL: Symbolic Projective Layout for Allocentric Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9604-9614} }
QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Weiyu and Li, Ru and Liu, Jiaqi and Zhao, Sizhe and Liu, Qinglin and Zhang, Shengping}, title = {QueryMe: Query-Driven Open-Vocabulary 3D Object Affordances Grounding from Multimodal Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2603-2612} }
Portable Active Learning for Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Rashi and Bersamin, Justin Timothy C. and Subramanian, Karthikk}, title = {Portable Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25514-25523} }
Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuzhuang and Tian, Xiaolin and Sun, Qigong}, title = {Dynamic Label Noise Suppression with Optimal Teacher Pool for Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32389-32398} }
LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Liu, Sidi and Xiu, Waikit and Chen, Weixing and Wang, Keze}, title = {LASAR: Towards Spatio-temporal Reasoning with Latent Cognitive Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23880-23890} }
DREAM: Document Recognition with Explicit Adaptive Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Tianqi and Wu, Di and Peng, Liangrui and Huang, Yifan and Zhao, Kemeng and Li, Shuo and Li, Zhiyu and Wang, Yizhu and Jiang, Borui and Li, Yuyang}, title = {DREAM: Document Recognition with Explicit Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2715-2724} }
YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyang and Lei, Lina and Li, Fan and Guo, Chunle and Kong, Dehong and Qin, Xinran and Wang, Zhixin and Cheng, Mingming and Li, Chongyi}, title = {YOSE: You Only Select Essential Tokens for Efficient DiT-based Video Object Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32926-32935} }
RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Lixin and Zheng, Chengwei and Paschalidis, Georgios and Guo, Chen and Kaufmann, Manuel and Zarate, Juan and Tzionas, Dimitrios}, title = {RHINO: Reconstructing Human Interactions with Novel Objects from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13835-13845} }
Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Chen, Qi and He, Honghao and Sun, Linzhuang and He, Conghui and Wu, Lijun and Yu, Bihui and Tan, Cheng}, title = {Geoint-R1: Formalizing Multimodal Geometric Reasoning with Dynamic Auxiliary Constructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2547-2556} }
Geometric Neural Distance Fields for Learning Human Motion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Zhengdi and Foti, Simone and Zhang, Linguang and Labs, g921@gmail.com Meta Reality and Zhao, Amy and Keskin, Cem and Zafeiriou, Stefanos and Birdal, Tolga}, title = {Geometric Neural Distance Fields for Learning Human Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2232-2242} }
FARMER: Flow AutoRegressive Transformer over Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Guangting and Zhao, Qinyu and Yang, Tao and Xiao, Fei and Lin, Zhijie and Wu, Jie and Deng, Jiajun and Zhang, Yanyong and Zhu, Rui}, title = {FARMER: Flow AutoRegressive Transformer over Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25730-25741} }
IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yankai and Li, Qiaoru and Xu, Binlu and Sun, Haoran and Ding, Chao and Dong, Junting and Cai, Yuxiang and Zhang, Xuhong and Yin, Jianwei}, title = {IBISAgent: Reinforcing Pixel-Level Visual Reasoning in MLLMs for Universal Biomedical Object Referring and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20996-21005} }
CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhijiang and Wang, Linhua and Qi, Jiaxin and Jiang, Weihao and Hou, Peng and Zeng, Anxiang and Huang, Jianqiang}, title = {CCCaption: Dual-Reward Reinforcement Learning for Complete and Correct Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22153-22163} }
Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Sha and Pan, Jiao and Guo, Yu and Yao, Chao}, title = {Virtual Nodes Guided Dynamic Graph Neural Network for Brain Tumor Segmentation with Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37528-37537} }
LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Ge, Xingtong and Gong, Ruihao and Lv, Chengtao and Zhang, Jun}, title = {LinVideo: A Post-Training Framework towards O(n) Attention in Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23398-23408} }
Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yulong and Li, Shijie and Li, Ziyi and Qi, Lin}, title = {Tell2Adapt: A Unified Framework for Source Free Unsupervised Domain Adaptation via Vision Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6941-6950} }
Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qing and Li, Xuesong and Zhang, Jing}, title = {Probing and Bridging Geometry-Interaction Cues for Affordance Reasoning in Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2526-2536} }
Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xiangkai and Xing, Lekai and Zhang, Han and Li, Wenzhong and Lu, Sanglu}, title = {Unifying Perception and Action: A Hybrid-Modality Pipeline with Implicit Visual Chain-of-Thought for Robotic Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22380-22390} }
The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haiyang and Pu, Nan and Cai, Yaqi and Long, Teng and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {The Devil Is in Gradient Entanglement: Energy-Aware Gradient Coordinator for Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3563-3573} }
MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Peiqing and Zhou, Shangchen and Hao, Kai and Tao, Qingyi}, title = {MatAnyone 2: Scaling Video Matting via a Learned Quality Evaluator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37476-37485} }
IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Hong, Yuyang and Xia, Yang and Ding, Kun and Zhang, Zeyu and Wang, Ying and Xiang, Shiming and Pan, Chunhong}, title = {IF-Bench: Benchmarking and Enhancing MLLMs for Infrared Images with Generative Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8205-8215} }
DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behari_2026_CVPR, author = {Behari, Nikhil and Rivero, Diego and Apostolides, Luke and Ghosh, Suman and Liang, Paul Pu and Raskar, Ramesh}, title = {DENALI: A Dataset Enabling Non-Line-of-Sight Spatial Reasoning with Low-Cost LiDARs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3046-3055} }
VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bousselham_2026_CVPR, author = {Bousselham, Walid and Kuehne, Hilde and Schmid, Cordelia}, title = {VOLD: Reasoning Transfer from LLMs to Vision-Language Models via On-Policy Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26209-26218} }
FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ballegeer_2026_CVPR, author = {Ballegeer, Matteo and Benoit, Dries F.}, title = {FoV-Net: Rotation-Invariant CAD B-rep Learning via Field-of-View Ray Casting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3024-3034} }
Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Yue and Yang, Siqi and Zhong, Ting and Zhou, Fan}, title = {Harmonic Canvas: Inversion-Free Editing for Visually-Guided Music Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29727-29737} }
VideoCoF: Unified Video Editing with Temporal Reasoner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiangpeng and Xie, Ji and Yang, Yiyuan and Ma, Yue and Huang, Yan and Xu, Min and Wu, Qiang}, title = {VideoCoF: Unified Video Editing with Temporal Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37940-37949} }
RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Lin, Endian and Jiang, Donghong and Zhu, Chuang}, title = {RE-VLM: Event-Augmented Vision-Language Model for Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10377-10386} }
Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dimlioglu_2026_CVPR, author = {Dimlioglu, Tolga and Chang, Nadine and Shen, Maying and Mahmood, Rafid and Alvarez, Jose M.}, title = {Scaling-Aware Data Selection for End-to-End Autonomous Driving Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17798-17808} }
Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jianhui and Zhou, Jian and Zhou, Zhi and Huang, Zhangjin and Li, Chao}, title = {Neural Dynamic GI: Random-Access Neural Compression for Temporal Lightmaps in Dynamic Lighting Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5316-5325} }
A Faster Path to Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wei and Yuan, Hangjie and Zhao, Zixiang and Kang, Borui and Liu, Ziwei and Feng, Tao}, title = {A Faster Path to Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25088-25098} }
HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuerui and Wang, Xuehao and Zhuang, Zhan and Zhao, Linglan and Li, Ziyue and Zhang, Xinmin and Song, Zhihuan and Zhang, Yu}, title = {HAD: Heterogeneity-Aware Distillation for Lifelong Heterogeneous Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10863-10873} }
PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Zhengjian and Zhuang, Jun and Mo, Kangtong and Chen, Qi and Liu, Rui and Zhang, Ye}, title = {PaQ-DETR: Learning Pattern and Quality-Aware Dynamic Queries for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25504-25513} }
VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Ming and Wang, Yuanlei and Zhang, Liuzhou and An, Ruichuan and Zhang, Renrui and Liang, Hao and Lu, Ming and Shen, Ying and Zhang, Wentao}, title = {VCU-Bridge: Hierarchical Visual Connotation Understanding via Semantic Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26187-26197} }
Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Chanhyuk and Kim, Taesoo and Lee, Donggyu and Jung, Siyeol and Kim, Taehwan}, title = {Cross-Modal Emotion Transfer for Emotion Editing in Talking Face Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1759-1770} }
Seeing What Matters: Visual Preference Policy Optimization for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Ziqi and Liang, Yuanzhi and Li, Rui and Zhou, Yi and Huang, Haibin and Zhang, Chi and Li, Xuelong}, title = {Seeing What Matters: Visual Preference Policy Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27260-27269} }
Harnessing the Power of Foundation Models for Accurate Material Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Qingran and Yang, Fengwei and Zhu, Chaolun}, title = {Harnessing the Power of Foundation Models for Accurate Material Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3636-3645} }
When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Wang, Xiao and Du, Zongyuan and Zhang, Yu}, title = {When Transformers Meet Mamba: A Hybrid Transformer-Mamba Network for Video Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18492-18502} }
Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hongyuan and Chen, Xingyu and Xu, Zexiang and Chen, Anpei}, title = {Motion 3-to-4: 3D Motion Reconstruction for 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28947-28958} }
Generative Modeling of Weights: Generalization or Memorization?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Boya and Yin, Yida and Xu, Zhiqiu and Liu, Zhuang}, title = {Generative Modeling of Weights: Generalization or Memorization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41974-41984} }
HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yifan and Pittaluga, Francesco and Tasneem, Zaid and You, Chenyu and Chandraker, Manmohan and Jiang, Ziyu}, title = {HorizonForge: Driving Scene Editing with Any Trajectories and Any Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24895-24905} }
EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qiuhui and Yao, Xuancheng and Zhou, Zhenglei and Hu, Xinyue and Hong, Yi}, title = {EMAD: Evidence-Centric Grounded Multimodal Diagnosis for Alzheimer's Disease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23031-23040} }
OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yiwen and Zheng, Ce and Wang, Yufu and Yang, Hsueh-Han Daniel and Wen, Liting and Jeni, L\'aszl\'o A.}, title = {OnlineHMR: Video-based Online World-Grounded Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13951-13961} }
I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Jun_2026_CVPR, author = {Jun, Youngjun and Kang, Seil and Han, Woojung and Hwang, Seong Jae}, title = {I'm a Map! Interpretable Motion-Attentive Maps: Spatio-Temporally Localizing Concepts in Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11525-11535} }
Parallel Jacobi Decoding for Fast Autoregressive Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Boya and Li, Ying and Jian, Siyong and Wang, Huan}, title = {Parallel Jacobi Decoding for Fast Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9008-9018} }
Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Shuai and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Addressing Exacerbated Attention Sink for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29494-29503} }
MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ruoxiang and Yuan, Zhen}, title = {MODIX: A Training-Free Multimodal Information-Driven Positional Index Scaling for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31534-31543} }
HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Jiang, Fei and Zhu, Dandan and Zhou, Aimin}, title = {HamiPose: Hamiltonian Optimization for Unsupervised Domain Adaptive Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13856-13865} }
CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Naiyu and Wang, Hanjing and Yu, Yue and Gao, Tian and Dhurandhar, Amit and Lee, Chung-Hao and Ji, Qiang}, title = {CGU-Bayes: Causal Graph Uncertainty-Guided Bayesian Inference for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10522-10532} }
MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yongjian and Zou, Xu and Chen, Wenjun and Li, Huixuan and Xie, Xiaoen and Li, Chunxi and Huang, Shixiang and Zhang, Gang and Zhou, Jiahuan and Zhong, Sheng and Yan, Luxin}, title = {MSCD-GS: Motion-Separated Cooperative Deblurring Dynamic Reconstruction via Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11726-11735} }
Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yabin and Varma, Maya and Gao, Yunhe and Delbrouck, Jean-Benoit and Liu, Jiaming and Wang, Chong and Langlotz, Curtis}, title = {Activation Matters: Test-time Activated Negative Labels for OOD Detection with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17462-17473} }
3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoye and Tang, Chen and Yue, Xiangyu and Li, Wei-Hong}, title = {3D-Aware Multi-Task Learning with Cross-View Correlations for Dense Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5793-5803} }
CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2026_CVPR, author = {Kuang, Zhaonian and Ding, Rui and Wang, Haotian and Zheng, Xinhu and Yang, Meng and Hua, Gang}, title = {CoIn3D: Revisiting Configuration-Invariant Multi-Camera 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40707-40716} }
FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Songpengcheng and Zhang, Qingyu and Su, Zhuo and Yang, Jiarui and Lai, Zengyuan and Wu, Qi and Pei, Ling}, title = {FisherPoser: Human Motion Estimation from Sparse Observations with Hierarchical Region-Wise Fisher-Matrix Uncertainty Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28413-28423} }
Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Junfeng and Zhou, Wenyang and Li, Xueheng and He, Xuanhua and Gan, Jianhou and Ren, Wenqi}, title = {Multigrain-aware Semantic Prototype Scanning and Tri-Token Prompt Learning Embraced High-Order RWKV for Pan-Sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13234-13243} }
Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kinoshita_2026_CVPR, author = {Kinoshita, Genki and Nakamura, Shu and Kawahara, Ryo and Nobuhara, Shohei and Kawanishi, Yasutomo and Nishino, Ko}, title = {Action Motifs: Self-Supervised Hierarchical Representation of Human Body Movements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20139-20148} }
StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Junlin and Zheng, Quanlong and Zhang, Ruifei and Wang, Kuo and Zhang, Yanhao and Luo, Jinguo and Lu, Haonan and Wan, Xiang and Li, Guanbin}, title = {StreamRAG: Enhancing Real-Time Video Understanding with Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38870-38879} }
Improving Vision-language Models with Perception-centric Process Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Yingqian and Zhou, Kun and Li, Yifan and Wu, Yuhuan and Peng, Han and Du, Yifan and Zhao, Wayne Xin and Yang, Min and Wen, Ji-Rong}, title = {Improving Vision-language Models with Perception-centric Process Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33099-33109} }
Repurposing 3D Generative Model for Autoregressive Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoran and Niu, Yifan and Huang, Zehuan and Sun, Yang-Tian and Guo, Chunchao and Peng, Yuxin and Sheng, Lu}, title = {Repurposing 3D Generative Model for Autoregressive Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3231-3243} }
RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Mochu and Shen, Zhelun and Li, Xuesong and Ren, Jiahui and Zhang, Jing and Zhao, Chen and Liu, Shanshan and Feng, Haocheng and Wang, Jingdong and Dai, Yuchao}, title = {RnG: A Unified Transformer for Complete 3D Modeling from Partial Observations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {406-416} }
S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoquan and Yu, Zhehao and Zhang, Lisai and Lin, Kenghong and Chen, Tianran and Sun, Yuxi and Ye, Yunming and He, Yao}, title = {S2FT: Parameter-Efficient Fine-Tuning in Sparse Spectrum Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20191-20201} }
DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Yuanjun and Xiao, Aoran and Deng, Liqian and Tu, Zhigang}, title = {DarkAct: A RGB-Thermal Dataset and Fusion Framework for Multimodal Low-Light Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27345-27356} }
PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Yinan and Jiang, Qing-Yuan}, title = {PAF: Perturbation-Aware Filtering for Open-Set Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24803-24812} }
AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Long and Wang, Hui and Xu, Man and Li, Zexuan and Fan, Zizhu}, title = {AKCMamba-YOLO: Selective State Space Models For Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4438-4447} }
Global Information Thresholding for Sufficient and Necessary Circuits-
[pdf]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Jegyeong}, title = {Global Information Thresholding for Sufficient and Necessary Circuits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3264-3273} }
M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA-
[pdf]
[supp]
[bibtex]@InProceedings{Venna_2026_CVPR, author = {Venna, Venkata Kesav and Gunda, Sai Madhusudan and Jinka, Jyothi Swaroopa and Rachakonda, Hrithik Sagar and Srinivasan, Anirudh and Sarvadevabhatla, Ravi Kiran}, title = {M3Grounder: Mask-Based Multi-Span and Multi-Granular Grounding for Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23685-23695} }
GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Chaoqun and Fu, Zongjing and Chang, Powei and Zhang, Jinpeng and Xiang, Jianxiang and Gao, Yukang and Wang, Chenyu}, title = {GeoRK2: Geometry-Guided Runge-Kutta Integration for Diffusion Transformer Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9404-9413} }
Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Shiman and Chen, Nuo and Ying, Xinyi and Luo, Yihang and Shi, Yangsi and Lin, Zaiping and Li, Miao}, title = {Towards Persistence: Learning Topological Constraints for Event-based Small Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22291-22300} }
Spatial Matters: Position-Guided 3D Referring Expression Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yabing and Tian, Zhuotao and Wang, Le and Qin, Zheng and Zhou, Sanping}, title = {Spatial Matters: Position-Guided 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39486-39496} }
WonderZoom: Multi-Scale 3D World Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jin and Yu, Hong-Xing and Wu, Jiajun}, title = {WonderZoom: Multi-Scale 3D World Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5859-5869} }
RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiarui and Li, Zhihao and Wang, Chong and Wen, Bihan}, title = {RF4D:Neural Radar Fields for Novel View Synthesis in Outdoor Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15387-15397} }
Concept-Aware Batch Sampling Improves Language-Image Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Adhiraj and Udandarao, Vishaal and Nguyen, Thao and Farina, Matteo and Cherti, Mehdi and Jitsev, Jenia and Oh, Sewoong and Ricci, Elisa and Schmidt, Ludwig and Bethge, Matthias}, title = {Concept-Aware Batch Sampling Improves Language-Image Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3056-3068} }
GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Xin and Liu, Xiabi and Pan, Liyuan}, title = {GeoFree-CoSeg: Unsupervised Point Cloud-Image Cross-Modal Co-Segmentation Without Geometric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10778-10788} }
Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yixuan and Sheng, Jinhao and Zhang, Wenxin and Kong, Quyu and Zhou, Feng}, title = {Negative Binomial Variational Autoencoders for Overdispersed Latent Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16289-16298} }
Any Resolution Any Geometry: From Multi-View To Multi-Patch-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Wenqing and Li, Zhenyu and Lavreniuk, Mykola and Shi, Jian and Idoughi, Ramzi and Tang, Xiangjun and Wonka, Peter}, title = {Any Resolution Any Geometry: From Multi-View To Multi-Patch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12576-12585} }
Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Jiayun and Chai, Haolong and Huang, Xueying and Guo, Xiaoqing and Zheng, Zengwei and Zhou, Zhan and Wang, Junmei and Wang, Xinyu and Liu, Jie and Zhou, Binbin}, title = {Ultrasound-CLIP: Semantic-Aware Contrastive Pre-training for Ultrasound Image-Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6962-6971} }
ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {ReHyAt: Recurrent Hybrid Attention for Video Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40674-40684} }
MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Jiang, Yibo and Lu, Yehao and Wang, Zhizhong and Huang, Zeyi and Qin, Zequn and Li, Xi}, title = {MultiCrafter: High-Fidelity Multi-Subject Generation via Disentangled Attention and Identity-Aware Preference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36691-36702} }
Camouflage-aware Image-Text Retrieval via Expert Collaboration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yao and Mao, Zhongkuan and Wu, Xuan and Fu, Keren and Zhao, Qijun}, title = {Camouflage-aware Image-Text Retrieval via Expert Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23933-23943} }
Benchmarking PhD-Level Coding in 3D Geometric Computer Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenyi and Luo, Renkai and Yu, Yue and Gao, Huan-ang and Gao, Mingju and Yuan, Li and Fu, Chaoyou and Zhao, Hao}, title = {Benchmarking PhD-Level Coding in 3D Geometric Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30974-30985} }
Mitigating Error Amplification in Fast Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Mengnan and Zhang, Lihe and Wang, Bo and Zheng, Tianhang and Zhong, Hong and Min, Geyong}, title = {Mitigating Error Amplification in Fast Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13346-13355} }
AirSim360: A Panoramic Simulation Platform within Drone View-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Xian and Pan, Yuling and Zhang, Yuhang and Li, Xiang and Zhang, Weijun and Zhang, Dizhe and Wan, Zhaoliang and Lin, Xin and Zhang, Xiangkai and Liang, Juntao and Li, Xiangtai and Jiang, WenJie and Du, Bo and Yang, Ming-Hsuan and Qi, Lu}, title = {AirSim360: A Panoramic Simulation Platform within Drone View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26931-26940} }
HP-Edit: A Human-Preference Post-Training Framework for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fan and Wang, Chonghuinan and Lei, Lina and Qiu, Yuping and Xu, Jiaqi and Jiang, Jiaxiu and Qin, Xinran and Chen, Zhikai and Song, Fenglong and Wang, Zhixin and Pei, Renjing and Zuo, Wangmeng}, title = {HP-Edit: A Human-Preference Post-Training Framework for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43113-43123} }
SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziyi and Guo, Yingnan and Chu, Zedong and Luo, Minghua and Shen, Yanfen and Sun, Mingchao and Hu, Junjun and Xie, Shichao and Kuan, Yang and Shi, Pei and Gu, Zhining and Liu, Lu and Han, Honglin and Wu, Xiaolong and Xu, Mu and Zhang, Yu}, title = {SocialNav: Training Human-Inspired Foundation Model for Socially-Aware Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28796-28806} }
Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Binyuan and Lu, Yuning and Jia, Weinan and Wang, Hualiang and Liu, Mu and Yang, Daiqing}, title = {Rethinking Position Embedding as a Context Controller for Multi-Reference and Multi-Shot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23304-23313} }
Complet4R: Geometric Complete 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Weibang and Li, Kenan and Chen, Zhuoguang and Yuan, Yijun and Zhao, Hang}, title = {Complet4R: Geometric Complete 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {341-351} }
MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenglong and Huo, Yifu and Gan, Yang and He, Qiaozhi and Meng, Qi and Li, Bei and Wang, Yan and Liu, Junfu and Zhou, Tianhua and Zhu, Jingbo and Xiao, Tong}, title = {MSRL: Scaling Generative Multimodal Reward Modeling via Multi-Stage Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29410-29420} }
PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Hong-Phuc and Nguyen, Phong and Tran, Anh}, title = {PixelRush: Ultra-Fast, Training-Free High-Resolution Image Generation via One-step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35946-35955} }
ShadowDraw: From Any Object to Shadow-Drawing Compositional Art-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Rundong and Snavely, Noah and Ma, Wei-Chiu}, title = {ShadowDraw: From Any Object to Shadow-Drawing Compositional Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24428-24437} }
Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ke and He, Xuanhua and Li, Xueheng and Zhu, Lingting and Wang, Yingying and Ma, Ao and Zhang, Zhanjie and Zhou, Man and Xie, Chengjun and Zhang, Jie}, title = {Cross-Scale Pansharpening via ScaleFormer and the PanScale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13211-13221} }
Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xinxuan and Fowlkes, Charless and Berg, Alexander C.}, title = {Camera Control for Text-to-Image Generation via Learning Viewpoint Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29222-29232} }
MatLat: Material Latent Space for PBR Texture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2026_CVPR, author = {Yeo, Kyeongmin and Min, Yunhong and Kim, Jaihoon and Sung, Minhyuk}, title = {MatLat: Material Latent Space for PBR Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4602-4612} }
LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xinran and Lai, Shuichang and Lyu, Jiangjing and Li, Hongjie and Pan, Bowen and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {LoG3D: Ultra-High-Resolution 3D Shape Modeling via Local-to-Global Partitioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5945-5955} }
It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Harrington_2026_CVPR, author = {Harrington, Anne and Koepke, A. Sophia and Karthik, Shyamgopal and Darrell, Trevor and Efros, Alexei A.}, title = {It's Never Too Late: Noise Optimization for Collapse Recovery in Trained Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43124-43134} }
StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Zhihao and Wei, Wenkang and Fang, Yuan and Yu, Xingtong and Zhang, Hui and Zhu, Weicheng and Zhang, Xin}, title = {StaR-KVQA: Structured Reasoning Traces for Implicit-Knowledge Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5114-5124} }
A Causal Marriage between VLM and IRM from Understanding to Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziliang and Xiao, Tianang and Zhang, Jusheng and Zheng, Yongsen and Liu, Yang and Lai, Zhao-rong and Lin, Liang}, title = {A Causal Marriage between VLM and IRM from Understanding to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4749-4760} }
CamDirector: Towards Long-Term Coherent Video Trajectory Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Kejia and Shi, Zhihao and Wan, Weilin and Zhou, Yuhongze and Yu, Yuanhao and Zuo, Xinxin and Sun, Qiang and Lu, Juwei}, title = {CamDirector: Towards Long-Term Coherent Video Trajectory Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32683-32692} }
SpiderCam: Low-Power Snapshot Depth from Differential Defocus-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ferreira_2026_CVPR, author = {Ferreira, Marcos A. and Li, Tianao and Mamish, John and Hester, Josiah and Sangar, Yaman and Guo, Qi and Alexander, Emma}, title = {SpiderCam: Low-Power Snapshot Depth from Differential Defocus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41699-41709} }
Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Gui, Ming and Li, Yusong and Ma, Pingchuan and Krause, Felix and Ommer, Bj\"orn}, title = {Denoising, Fast and Slow: Difficulty-Aware Adaptive Sampling for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43260-43270} }
Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yifan and Zhou, Kun and Min, Yingqian and Ling, Yue and Zhao, Wayne Xin and Wu, Youbin and Wen, Ji-Rong}, title = {Revisiting the Necessity of Lengthy Chain-of-Thought in Vision-centric Reasoning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12019-12029} }
Leveraging Multispectral Sensors for Color Correction in Mobile Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cogo_2026_CVPR, author = {Cogo, Luca and Buzzelli, Marco and Bianco, Simone and Vazquez-Corral, Javier and Schettini, Raimondo}, title = {Leveraging Multispectral Sensors for Color Correction in Mobile Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12438-12447} }
Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bahram_2026_CVPR, author = {Bahram, Yara and Desbos, M\'elodie and Shateri, Mohammadhadi and Granger, Eric}, title = {Uni-DAD: Unified Distillation and Adaptation of Diffusion Models for Few-step Few-shot Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26635-26645} }
Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Sicheng and Deng, Yu and Hu, Shoukang and Wang, Yichuan and Zhang, Yizhong and Chen, Zhan and Yang, Jiaolong and Guo, Baining}, title = {Real-Time Generation of Streamable Talking Portrait Video with Reference-Guided Deep Compression VAEs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9284-9295} }
Towards Human-Like Robot Handwriting via Contour-Aware Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yutao and Dai, Gang and Zhang, Yifan and Han, Youwei and He, Qisheng and Huang, Shuangping}, title = {Towards Human-Like Robot Handwriting via Contour-Aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31597-31607} }
AE2VID: Event-based Video Reconstruction via Aperture Modulation-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Chenxu and Li, Boyu and Duan, Peiqi and Zhou, Xinyu and Lou, Hanyue and Shi, Boxin}, title = {AE2VID: Event-based Video Reconstruction via Aperture Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15115-15124} }
Anchoring and Rescaling Attention for Semantically Coherent Inbetweening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Tae Eun and Shim, Sumin and Kim, Junhyeok and Hwang, Seong Jae}, title = {Anchoring and Rescaling Attention for Semantically Coherent Inbetweening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8976-8985} }
UniT: Unified Multimodal Chain-of-Thought Test-time Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Leon Liangyu and Ma, Haoyu and Fan, Zhipeng and Huang, Ziqi and Sinha, Animesh and Dai, Xiaoliang and Wang, Jialiang and He, Zecheng and Yang, Jianwei and Li, Chunyuan and Sun, Junzhe and Wang, Chu and Yeung-Levy, Serena and Juefei-Xu, Felix}, title = {UniT: Unified Multimodal Chain-of-Thought Test-time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30257-30267} }
Verifying Neural Network Robustness with Dual Perturbations-
[pdf]
[supp]
[bibtex]@InProceedings{Duong_2026_CVPR, author = {Duong, Hai and Nguyen, Lam and Le, Thanh and Nguyen, ThanhVu}, title = {Verifying Neural Network Robustness with Dual Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27916-27925} }
VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rokuss_2026_CVPR, author = {Rokuss, Maximilian and Langenberg, Moritz and Kirchhoff, Yannick and Isensee, Fabian and Hamm, Benjamin and Ulrich, Constantin and Regnery, Sebastian and Bauer, Lukas and Katsigiannopulos, Efthimios and Norajitra, Tobias and Maier-Hein, Klaus}, title = {VoxTell: Free-Text Promptable Universal 3D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37538-37557} }
Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Bo and Ze, Xueyang and Wang, Beibei and Wang, Xixi and Wan, Xixi and Luo, Bin}, title = {Beyond Graph Model: Reliable VLM Fine-Tuning via Random Graph Adapter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11664-11673} }
Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Sun, Zhijing and Liu, Kean and Lu, Xin and Jiang, Ruixuan and Fu, Xueyang and Zha, Zheng-Jun}, title = {Event-Illumination Collaborative Low-light Image Enhancement with a High-resolution Real-world Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22270-22280} }
VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Weitai and Kuen, Jason and Ren, Mengwei and Wei, Zijun and Yan, Yan and Liu, Kangning}, title = {VGent: Visual Grounding via Modular Design for Disentangling Reasoning and Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41160-41170} }
HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yangguang and Fang, Quan and Li, Yufei and Sun, Jiachen and Gao, Junyu and Sang, Jitao}, title = {HulluEdit: Single-Pass Evidence-Consistent Subspace Editing for Mitigating Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11086-11095} }
Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Hara_2026_CVPR, author = {Hara, Takayuki and Otsuka, Yuya}, title = {Probabilistic Prompt Adaptation for Unified Image Aesthetics and Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37236-37246} }
LiveGesture: Streamable Co-Speech Gesture Generation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saleem_2026_CVPR, author = {Saleem, Muhammad Usama and Patel, Mayur Jagdishbhai and Pinyoanuntapong, Ekkasit and Qin, Zhongxing and Yang, Li and Xue, Hongfei and Helmy, Ahmed and Chen, Chen and Wang, Pu}, title = {LiveGesture: Streamable Co-Speech Gesture Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2264-2273} }
PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction & Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Oroz_2026_CVPR, author = {Oroz, Antonio and Nie{\ss}ner, Matthias and Kirschstein, Tobias}, title = {PercHead: Perceptual Head Model for Single-Image 3D Head Reconstruction \& Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4097-4108} }
Defending Unauthorized Model Merging via Dual-Stage Weight Protection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Wei-Jia and Tsai, Min-Yan and Lee, Cheng-Yi and Yu, Chia-Mu}, title = {Defending Unauthorized Model Merging via Dual-Stage Weight Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27926-27935} }
SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenjie and Yang, Chen and Lu, Xin and Wang, Zhen and Liu, Yue and Xi, Bobo and Zhang, Pengbo}, title = {SMAP: Semantic Route Planning with Map-Grounded Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40108-40118} }
ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Peijun and Luo, Anwei and Pan, Gang and Kot, Alex C. and Jiang, Xudong}, title = {ActivityForensics: A Comprehensive Benchmark for Localizing Manipulated Activity in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42987-42996} }
Affine Perspective-Three-Point Problem-
[pdf]
[supp]
[bibtex]@InProceedings{Nakano_2026_CVPR, author = {Nakano, Gaku}, title = {Affine Perspective-Three-Point Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12217-12226} }
Refacade: Editing Object with Given Reference Texture-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Youze and Ruan, Penghui and Zi, Bojia and Qi, Xianbiao and Wang, Jianan and Xiao, Rong}, title = {Refacade: Editing Object with Given Reference Texture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1961-1972} }
Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhichao and Wang, Jianjie and Zhang, Zhixianhe and Xie, Pangu and Sheng, Xiangfei and Chen, Pengfei and Li, Leida}, title = {Fine-grained Image Aesthetic Assessment: Learning Discriminative Scores from Relative Ranks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {145-155} }
ThinkGen: Generalized Thinking for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Siyu and Lin, Yiheng and Zhong, Yujie and She, Qi and Zhou, Wei and Lan, Xiaohan and Huang, Zilong and Yu, Fei and Yu, Yingchen and Zhao, Yunqing and Zhao, Yao and Wei, Yunchao}, title = {ThinkGen: Generalized Thinking for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14713-14723} }
Mitigating The Distribution Shift of Diffusion-based Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yue and Hu, Chenyu and An, Pengyu and Li, Yong-Lu}, title = {Mitigating The Distribution Shift of Diffusion-based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33943-33952} }
LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yusheng and Lou, Lizhi and Tang, Yan and Miao, Zekai and Zhang, Shaoming and Wang, Jianmei}, title = {LiteSense: Lifting Lightweight ToF with RGB for High-Resolution Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5783-5792} }
MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chunpu and Liang, Zhixuan and Yang, Tianshuo and Chan, Chi-Min and Xiao, Yang and Wang, Jessie and Yang, Xiaokang and Mu, Yao}, title = {MoEActok: A MoE-based Action Tokenizer for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28042-28051} }
COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Yuchen and Wu, Jingtu and Zheng, Hao and Kanezaki, Asako}, title = {COG: Confidence-aware Optimal Geometric Correspondence for Unsupervised Single-reference Novel Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11567-11578} }
Generative Video Compression with One-Dimensional Latent Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zihan and Jia, Zhaoyang and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Chen, Zhenghao and Li, Houqiang and Lu, Yan}, title = {Generative Video Compression with One-Dimensional Latent Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41256-41265} }
ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Youngho and Cho, Wonjune and Ha, Hyunho and Kim, Sujung and Yoon, Kuk-Jin}, title = {ExPose: Reinforcing Video Generation Models for Extreme Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32636-32646} }
ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lassoued_2026_CVPR, author = {Lassoued, Aymen and Souibgui, Mohamed Ali and Kessentini, Yousri}, title = {ORCA: Orchestrated Reasoning with Collaborative Agents for Document Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19475-19486} }
Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Jeon, Jaebyeong and Hwang, Joong-Won and Lee, Kibok}, title = {Improving Calibration in Test-Time Prompt Tuning for Vision-Language Models via Data-Free Flatness-Aware Prompt Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24300-24309} }
Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Runsen and Wang, Weiyao and Tang, Hao and Chen, Xingyu and Wang, Xiaodong and Chu, Fu-Jen and Feiszli, Matt and Liang, Kevin J.}, title = {Multi-SpatialMLLM: Multi-Frame Spatial Understanding with Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31078-31088} }
Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jooyoung and Choi, Wonje and Song, Younguk and Woo, Honguk}, title = {Cross-Domain Demo-to-Code via Neurosymbolic Counterfactual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18848-18858} }
Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chengxin and Choi, Wonseok and Zhang, Chenshuang and Oh, Tae-Hyun}, title = {Aligning What Vision-Language Models See and Perceive with Adaptive Information Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24706-24715} }
Long-Tail Internet Photo Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuan and Xiangli, Yuanbo and Averbuch-Elor, Hadar and Snavely, Noah and Cai, Ruojin}, title = {Long-Tail Internet Photo Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {417-426} }
DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Hao and Liu, Yuliang and Liu, Xingchen and Zhang, Yuyi and Liao, Minghui and Wu, Jihao and Chen, Wei and Bai, Xiang}, title = {DocSeeker: Structured Visual Reasoning with Evidence Grounding for Long Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41140-41149} }
DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Govil_2026_CVPR, author = {Govil, Shreedhar and Stricker, Didier and Rambach, Jason}, title = {DriverGaze360: OmniDirectional Driver Attention with Object-Level Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39786-39795} }
FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Guangzhi}, title = {FlashIn: Fast and Accurate Image Inversion for Real-time Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30425-30434} }
Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Nan and Zeng, Yike and Zhang, Qian and Zhang, Qi and Pan, Zhiyi and Feng, Wei and Wan, Liang}, title = {Write Where It Matters: Policy-Guided Watermarks for 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6580-6590} }
Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haiyang and Zhou, Hongyun and Xu, Peng and Feng, Xiaoxue and Liu, Mengyi}, title = {Symphony: A Cognitively-Inspired Multi-Agent System for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24031-24041} }
WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Shan and Qiu, Longtian and Sun, Jiaxuan and He, Xuming}, title = {WikiCLIP: An Efficient Contrastive Baseline for Open-domain Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1596-1605} }
Gloria: Consistent Character Video Generation via Content Anchors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhang and Zhang, Fan and Pi, Huaijin and Zeng, Ailing and Guo, Shuai and Xu, Guowei and Zhai, Wei and Cao, Yang and Zha, Zheng-Jun}, title = {Gloria: Consistent Character Video Generation via Content Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36724-36735} }
MoCha: End-to-End Video Character Replacement without Structural Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengbo and Ma, Jie and Wang, Ziheng and Peng, Zhan and Liang, Jun and Li, Jing}, title = {MoCha: End-to-End Video Character Replacement without Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16279-16288} }
UniVBench: Towards Unified Evaluation for Video Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jianhui and Zhang, Xiaotian and Li, Yichen and Wang, Yuan and Zhang, Yan and Chen, Ziyi and Tang, Zhihang and Xu, Wei and Liu, Zuozhu}, title = {UniVBench: Towards Unified Evaluation for Video Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25654-25666} }
CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Nan and Wang, Huiqun and Zheng, Yaoyan and Huang, Di}, title = {CoVFT: Context-aware Visual Fine-tuning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24341-24351} }
RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Tongrui and Li, Qingbin and Zhu, Shengyu and Chen, Wei and Cheng, Xueqi}, title = {RaPA: Enhancing Transferable Targeted Attacks via Random Parameter Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6538-6548} }
Faster-GS: Analyzing and Improving Gaussian Splatting Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hahlbohm_2026_CVPR, author = {Hahlbohm, Florian and Franke, Linus and Eisemann, Martin and Magnor, Marcus}, title = {Faster-GS: Analyzing and Improving Gaussian Splatting Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18946-18957} }
X-band Radar Non-Line-of-Sight Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Dongyu and Zhao, Mingkun and Yang, Yutong and Scheuble, Dominik and Huang, Xiaolong and Shao, Zijian and Bijelic, Mario and Sengupta, Kaushik and Heide, Felix}, title = {X-band Radar Non-Line-of-Sight Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5647-5658} }
DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Henglin and Huang, Huijuan and Wang, Jing and Liu, Chang and Li, Xiu and Ji, Xiangyang}, title = {DiverseGRPO: Mitigating Mode Collapse in Image Generation via Diversity-Aware GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1864-1873} }
Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jingtao and Zhang, Kexin and Ma, Xunchi and Li, Yuechan and Zhu, Guangming and Shen, Peiyi and Jiang, Linhua and Zhang, Xiangdong and Zhang, Liang}, title = {Breaking Smooth-Motion Assumptions: A UAV Benchmark for Multi-Object Tracking in Complex and Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13594-13603} }
PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haohua and Zhou, Tianze and Zhu, Wei and Wang, Runqi and Guan, Yandong and Song, Dejia and Chen, Yibo and Tang, Xu and Hu, Yao and Sheng, Lu and Wu, Zhiyong}, title = {PROMO: Promptable Outfitting for Efficient High-Fidelity Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16074-16084} }
Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chongyang and Li, Haipeng and Cheng, Shen and Fan, Haoqiang and Feng, Ziliang and Liu, Shuaicheng}, title = {Action-Geometry Prediction with 3D Geometric Prior for Bimanual Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35036-35046} }
Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hengzhuang and Zhang, Xinsong and Peng, Qiming and Luo, Bin and Hu, Han and Jiang, Dengyang and Ye, Han-Jia and Zhang, Teng and Jin, Hai}, title = {Unleashing the Intrinsic Visual Representation Capability of Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1771-1786} }
Few-shot Acoustic Synthesis with Multimodal Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brunetto_2026_CVPR, author = {Brunetto, Amandine}, title = {Few-shot Acoustic Synthesis with Multimodal Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15773-15783} }
MotionEdit: Benchmarking and Learning Motion-Centric Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Yixin and Ke, Lei and Yu, Wenhao and Chang, Kai-Wei and Yu, Dong}, title = {MotionEdit: Benchmarking and Learning Motion-Centric Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9263-9272} }
Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Han and Georgescu, Bogdan and Zhang, Yanbo and Yoo, Youngjin and Baumgartner, Michael and Gao, Riqiang and Wang, Jianing and Zhao, Gengyan and Gibson, Eli and Comaniciu, Dorin and Grbic, Sasa}, title = {Revisiting 2D Foundation Models for Scalable 3D Medical Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30021-30031} }
OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Jianqiang and Liu, Lin and Hoi, Steven}, title = {OMG-Avatar: One-shot Multi-LOD Gaussian Head Avatar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11017-11028} }
UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Rui and Gao, Mingfei and Gang, Haiming and Lu, Jiasen and Gan, Zhe and Yang, Yinfei and Wu, Zuxuan and Dehghan, Afshin}, title = {UniGen-1.5: Enhancing Image Generation and Editing through Reward Unification in RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29367-29378} }
Reflection Separation from a Single Image via Joint Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zheng-Hui and Wang, Zhixiang and Liu, Yu-Lun and Chuang, Yung-Yu}, title = {Reflection Separation from a Single Image via Joint Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4569-4579} }
Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kadry_2026_CVPR, author = {Kadry, Karim and Abdelwahed, Abdalla and Manicka, Ajay and Chutisilp, Naravich and Nezami, Farhad R. and Edelman, Elazer R.}, title = {Anatomica: Localized Control over Geometric and Topological Properties for Anatomical Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15595-15605} }
Leveraging Verifier-Based Reinforcement Learning in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Wu, Jie and Liu, Jie and Gao, Yu and Ye, Zilyu and Yuan, Linxiao and Wang, Xionghui and Yu, Yizhou and Huang, Weilin}, title = {Leveraging Verifier-Based Reinforcement Learning in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34343-34352} }
UniCompress: Token Compression for Unified Vision-Language Understanding and Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyao and Chen, Chen and Li, Jingtao and Zhuang, Weiming and Huang, Jiabo and Li, Ang and Lyu, Lingjuan}, title = {UniCompress: Token Compression for Unified Vision-Language Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24663-24674} }
Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Taichun and Dong, Zhibin and Tan, Hao and Wang, Siwei and Liu, Xinwang and Zhu, En and Hu, Di and Liu, Tianrui and Li, Chuankun and He, Kunlun}, title = {Imbalanced View Contribution Evaluation and Refinement for Deep Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39606-39616} }
MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Noronha_2026_CVPR, author = {Noronha, Ian and Neave, Heather and Kaur, Upinder}, title = {MooCap: A Multi-View Benchmark for Cow-Object-Human Interaction and Behavior Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27324-27333} }
InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yuxin and Cao, Ke and Liu, Haowei and Ma, Ao and Li, Fengheng and Zhu, Honghe and Zhang, Zheng and Ling, Run and Feng, Wei and He, Xuanhua and Zhang, Zhanjie and Guo, Zhen and Bian, Haoyi and Lv, Jingjing and Shen, Junjie and Law, Ching}, title = {InnoAds-Composer: Efficient Condition Composition for E-Commerce Poster Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32988-32999} }
Envisioning the Future, One Step at a Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baumann_2026_CVPR, author = {Baumann, Stefan Andreas and Wiese, Jannik and Martorella, Tommaso and Kalayeh, Mahdi M. and Ommer, Bj\"orn}, title = {Envisioning the Future, One Step at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6823-6836} }
MeToM: Metadata-Guided Token Merging for Efficient Video LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhuojie and Wang, Shijie and Yu, Xin}, title = {MeToM: Metadata-Guided Token Merging for Efficient Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10441-10450} }
Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuru and Zhou, Yue}, title = {Breaking Spurious Correlations: Uncertainty-Driven Causal Transformers for AU Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7165-7174} }
Progressive Multi-cue Alignment for Unaligned RGBT Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Jiandong and Li, Chenglong and Feng, Hao and Lu, Andong and Huang, Lili and Tang, Jin}, title = {Progressive Multi-cue Alignment for Unaligned RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35207-35216} }
DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhuolin and Li, Jing and Li, Guanghao and Chen, Xiaolei and Tang, Jiacheng and Zhang, Siyang and Jin, Zhounan and Cai, Feipeng and Li, Bin and Pu, Jian and Cai, Jia and Xue, Xiangyang}, title = {DynamicVGGT: Learning Dynamic Point Maps for 4D Scene Reconstruction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35670-35679} }
Self-Evaluation Unlocks Any-Step Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xin and Qi, Xiaojuan and Li, Zhengqi and Zhang, Kai and Zhang, Richard and Lin, Zhe and Shechtman, Eli and Wang, Tianyu and Nitzan, Yotam}, title = {Self-Evaluation Unlocks Any-Step Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7816-7826} }
AURA: Multi-modal Shared Autonomy for Urban Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yukai and He, Honglin and Song, Selina and Wu, Wayne and Zhou, Bolei}, title = {AURA: Multi-modal Shared Autonomy for Urban Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18171-18181} }
LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Ruofan and M\"uller, Norman and Weber, Ethan and Zauss, Duncan and Vijaykumar, Nandita and Kontschieder, Peter and Richardt, Christian}, title = {LuxRemix: Lighting Decomposition and Remixing for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1100-1111} }
AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Yogesh and Fazli, Pooyan}, title = {AVATAR: Reinforcement Learning to See, Hear, and Reason Over Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7912-7922} }
Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yueming and Feng, Ruoyu and Dai, Qi and Wang, Yuqi and Lin, Wenfeng and Guo, Mingyu and Luo, Chong and Zheng, Nanning}, title = {Semantics Lead the Way: Harmonizing Semantic and Texture Modeling with Asynchronous Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43664-43674} }
Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Linchun and Zou, Qin and Yue, Yuanhao and Wang, Zhongyuan}, title = {Geometry-Aligned and Anomaly-Aware Reconstruction for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14648-14657} }
MPL: Match-guided Prototype Learning for Few-shot Action Recognition-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Feng and Zhao, Jie and Luo, Fulin and Qin, Anyong and Song, Tiecheng and Zhao, Yue and Gao, Chenqiang and Han, Junwei}, title = {MPL: Match-guided Prototype Learning for Few-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34439-34448} }
CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Pei and Ning, Qingtian and Lu, Xinyan and Liu, Haipeng and Ma, Weiliang and She, Dangen and Lang, Xianpeng and Ma, Jun}, title = {CogDriver: Integrating Cognitive Inertia for Temporally Coherent Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18150-18160} }
LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Hongyi and Durak, Gorkem and Aktas, Halil Ertugrul and Bejar, Andrea M. and Tutun, Baver and Uysal, Emre and Bulbul, Ezgi and Dogan, Mehmet Fatih and Erok, Berrin and Yildirim, Berna Akkus and Erturk, Sukru Mehmet and Bagci, Ulas}, title = {LUMINA: A Multi-Vendor Mammography Benchmark with Energy Harmonization Protocol}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35301-35310} }
VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Sixiao and Yin, Minghao and Hu, Wenbo and Li, Xiaoyu and Shan, Ying and Fu, Yanwei}, title = {VerseCrafter: Dynamic Realistic Video World Model with 4D Geometric Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40277-40290} }
PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Yuanhang and Cheng, Tao and Li, Xingxuan and Zhao, Boming and Huang, Siyuan and Hu, Ruizhen and Chen, Peter Yichen and Bao, Hujun and Cui, Zhaopeng}, title = {PhysSkin: Real-Time and Generalizable Physics-Based Animation via Self-Supervised Neural Skinning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32357-32366} }
Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency-
[pdf]
[supp]
[bibtex]@InProceedings{Gedik_2026_CVPR, author = {Gedik, Hakan Emre and Gupta, Shashank and Bovik, Alan}, title = {Learning Where to Look and How to Judge: Resolution-agnostic Image Quality Assessment with Quality-aware Saliency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37507-37517} }
VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jie and Li, Guang and Huang, Zhijian and Dang, Chenxu and Ye, Hangjun and Han, Yahong and Chen, Long}, title = {VGGDrive: Empowering Vision-Language Models with Cross-View Geometric Grounding for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10954-10964} }
LRHDR: Learning Representation-enhanced HDR Video Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Chenzhuo and Chen, Xin and Li, Bingchen and Meng, Yu and Yue, Tao and Hu, Xuemei}, title = {LRHDR: Learning Representation-enhanced HDR Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41584-41593} }
FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Jiuyang and Li, Jiahan and Jiang, Junjun and Zhang, Yongbing}, title = {FBTA: Enabling Single-GPU End-to-End Gigapixel WSI Classification with Feature Bridging and Translation Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7026-7035} }
Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Azeem_2026_CVPR, author = {Azeem, Abdullah and Wang, Ruisheng and Li, Qingquan and Siddique, Abubakar}, title = {Prompt-Free Unknown Label Generation for Open World Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34662-34672} }
Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Xiangyang and Li, Qingyu and Li, Yuming and Huang, Guanbo and Zhu, Yongjie and Qin, Wenyu and Wang, Meng and Wan, Pengfei and Huang, Shao-Lun}, title = {Beyond the Golden Data: Resolving the Motion-Vision Quality Dilemma via Timestep Selective Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43440-43449} }
Agile Deliberation: Concept Deliberation for Subjective Visual Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Leijie and Stretcu, Otilia and Qiao, Wei and Denby, Thomas and Viswanathan, Krishnamurthy and Luo, Enming and Lu, Chun-Ta and Dogra, Tushar and Krishna, Ranjay and Fuxman, Ariel}, title = {Agile Deliberation: Concept Deliberation for Subjective Visual Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4794-4804} }
WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Tianyue and Qu, Leigang and Yang, Tianyu and Hao, Xiangzhao and Xu, Yifan and Guo, Haiyun and Wang, Jinqiao}, title = {WISER: Wider Search, Deeper Thinking, and Adaptive Fusion for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16865-16875} }
EMMA: Extracting Multiple physical parameters from Multimodal Data-
[pdf]
[supp]
[bibtex]@InProceedings{Shaikh_2026_CVPR, author = {Shaikh, Farhat and Banerjee, Ayan and Gupta, Sandeep}, title = {EMMA: Extracting Multiple physical parameters from Multimodal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1716-1725} }
TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjae and Jung, Yoonkyo and Chun, Inkook and Lee, Yao-Chih and Cai, Zikui and Huang, Hongjia and Talreja, Aayush and Dao, Tan and Liang, Yongyuan and Huang, Jia-Bin and Huang, Furong}, title = {TraceGen: World Modeling in 3D Trace Space Enables Learning from Cross-Embodiment Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20721-20731} }
MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sinha_2026_CVPR, author = {Sinha, Arkaprava and Raj, Monish Soundar and Wang, Pu and Helmy, Ahmed and Le, Hieu and Das, Srijan}, title = {MS-Temba: Multi-Scale Temporal Mamba for Understanding Long Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9815-9826} }
HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Muyan and Cui, Erfei and Xing, Sen and Wang, Weiyun and Wu, Wen and Hu, Yuchen and Zhang, Yanting and Hu, Xiaowei and Wang, Wenhai and Zhang, Chao and Dai, Jifeng}, title = {HAVE-Bench: Hierarchical Audio-Visual Evaluation from Perception to Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8801-8812} }
Diffusion Probe: Generated Image Result Prediction Using CNN Probes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Bukun and Cui, Benlei and Ye, Zhizeng and Dong, Xuemei and Chen, Tuo and Xue, Hui and Yang, Dingkang and Huang, Longtao and Hong, Haiwen and Tang, Jingqun}, title = {Diffusion Probe: Generated Image Result Prediction Using CNN Probes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35926-35935} }
Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shih-Wen and Chen, Yen-Chang and Chu, Wei-Ta and Yang, Fu-En and Wang, Yu-Chiang Frank}, title = {Frequency Switching Mechanism for Parameter-Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20273-20282} }
FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuqiu and Song, Jialin and de Chanlatte, Marissa Ramirez and Chowdhury, Rochishnu and Desai, Rushil Paresh and Chen, Wuyang and Martin, Daniel and Mahoney, Michael W.}, title = {FluidGaussian: Propagating Simulation-Based Uncertainty Toward Functionally-Intelligent 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15421-15431} }
UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Liang and Wang, Min and Lu, Xingyu and Qiu, Aowen and Zhou, Wengang and Li, Houqiang}, title = {UAST: Unified Active Search and Tracking for Arbitrary Targets with UAVs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13464-13473} }
Cross-modal Representation Learning for Diffusion-generated Image Detection-
[pdf]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Tao and Wang, Dayong and Chu, Qi and Liu, Bin and Yu, Nenghai}, title = {Cross-modal Representation Learning for Diffusion-generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36092-36102} }
Describe Anything Anywhere At Any Moment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gorlo_2026_CVPR, author = {Gorlo, Nicolas and Schmid, Lukas and Carlone, Luca}, title = {Describe Anything Anywhere At Any Moment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35002-35013} }
TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR-
[pdf]
[supp]
[bibtex]@InProceedings{Lentsch_2026_CVPR, author = {Lentsch, Ted and Montiel-Mar{\'\i}n, Santiago and Caesar, Holger and Gavrila, Dariu M.}, title = {TerraSeg: Self-Supervised Ground Segmentation for Any LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10040-10050} }
META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jing and Chen, Luyuan and Xu, Zhijie and Li, Yadong and Xu, Xingzhong and Chen, Siye and Liu, Jie and Kong, Ming and Zhu, Qiang}, title = {META: Meta Evolution of Tool Trajectory Adaptation for Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9837-9846} }
Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhao and Li, Jiaqi and Deng, Youming and Chen, Ruxin and Zhang, Yingjia and Ma, Yifei and Guo, Li and Li, Yiming and Zhang, Jing and Feng, Chen}, title = {Wanderland: Geometrically Grounded Simulation for Open-World Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1041-1052} }
Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yu and Jiang, Hanwen and Abdelkader, Ahmed and Chu, Wen-Sheng and Feng, Brandon and Wang, Zhangyang and Huang, Qixing}, title = {Mining Attribute Subspaces for Efficient Fine-tuning of 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29071-29080} }
Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation-
[pdf]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jun and Huang, Hui}, title = {Rethinking Box Supervision: Bias-Free Weakly Supervised Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8525-8534} }
CVA: Context-aware Video-text Alignment for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, Sungho and Lee, Seunghun and Seo, Jiwan and Im, Sunghoon}, title = {CVA: Context-aware Video-text Alignment for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17578-17587} }
PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Bowen and Cai, Yujun and Yang, Ming-Hsuan and Wu, Hang and Wang, Yiwei}, title = {PAS: A Training-Free Stabilizer for Temporal Encoding in Video LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14471-14480} }
Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Chen, Zhangquan and Zhang, Yudong and Lu, Shilin and Yang, Cheng and Zhang, Jiangning and Yan, Shuicheng and Hu, Xiaobin}, title = {Visual Document Understanding and Reasoning: A Multi-Agent Collaboration Framework with Agent-Wise Adaptive Test-Time Scaling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12300-12311} }
Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Hongwei and Cai, Jiahang and Wang, Xun and Yang, Wenwu}, title = {Beyond Static Frames: Temporal Aggregate-and-Restore Vision Transformer for Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42891-42900} }
Electromagnetic Inverse Scattering from a Single Transmitter-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yizhe and Tian, Chunxun and Wang, Haoru and Zhu, Wentao and Ma, Xiaoxuan and Wang, Yizhou}, title = {Electromagnetic Inverse Scattering from a Single Transmitter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34040-34049} }
Bridging Human Evaluation to Infrared and Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinyuan and Li, Xingyuan and Mei, Qingyun and Xu, Haoyuan and Jiang, Zhiying and Ma, Long and Liu, Risheng and Fan, Xin}, title = {Bridging Human Evaluation to Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12322-12333} }
CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Jinwon and Ko, Keunsoo and Kim, Chang-Su}, title = {CanonCGT: Reference-Based Color Grading via Canonical Pivot Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15486-15495} }
Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Sungrae and Jeong, Jiwon and Shin, Jisu and Han, Donghee and Lee, Sol and Kim, Kyungeun and Yi, Mun Yong}, title = {Every Error has Its Magnitude: Asymmetric Mistake Severity Training for Multiclass Multiple Instance Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28244-28253} }
Thinking in 360deg: Humanoid Visual Search in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Heyang and Han, Yinan and Zhang, Xiangyu and Yin, Baiqiao and Chang, Bowen and Han, Xiangyu and Liu, Xinhao and Zhang, Jing and Pavone, Marco and Feng, Chen and Xie, Saining and Li, Yiming}, title = {Thinking in 360deg: Humanoid Visual Search in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22445-22455} }
Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Li and Zhang, Yingfu and Xu, Kepeng and He, Gang and Li, Yunsong}, title = {Towards Unified Human Perception and Machine Understanding: Token Flow Guided Compression Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17630-17640} }
Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Dongxin and Huang, Yan and Xu, Yong and Ji, Hui}, title = {Physically-Grounded Turbulence Mitigation with Frame-Shared Degradation Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29919-29928} }
Exploring the Underwater World Segmentation without Extra Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Huo, Tao and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {Exploring the Underwater World Segmentation without Extra Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39879-39889} }
Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinhang and Miraldo, Pedro and Lohit, Suhas and Jiang, Huaizu and Sawada, Naoko and Tai, Yu-Wing and Tang, Chi-Keung and Chatterjee, Moitreya}, title = {Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14600-14611} }
Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Han, Zhizhong}, title = {Speeding Up the Learning of 3D Gaussians with Much Shorter Gaussian Lists}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1231-1240} }
HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xuchang and Cao, Xu and Feng, Jinke and Fang, Hao}, title = {HOLO: Homography-Guided Pose Estimator Network for Fine-Grained Visual Localization on SD Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41376-41385} }
Guiding a Diffusion Transformer with the Internal Dynamics of Itself-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xingyu and Li, Qifan and Hu, Xiaobin and Chen, Hai and Gu, Shuhang}, title = {Guiding a Diffusion Transformer with the Internal Dynamics of Itself}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11536-11545} }
OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chujie and Lu, Jianyu and Luo, Zhiyuan and Chen, Xi and He, Chu}, title = {OVOD-Agent: A Markov-Bandit Framework for Proactive Visual Reasoning and Self-Evolving Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41416-41425} }
Generalizable Video Quality Assessment via Weak-to-Strong Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Linhan and Sun, Wei and Zhu, Xiangyang and Zhang, Kaiwei and Jia, Jun and Peng, Yicong and Zhu, Dandan and Zhai, Guangtao and Min, Xiongkuo}, title = {Generalizable Video Quality Assessment via Weak-to-Strong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25578-25588} }
CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xuanzuo and Zhang, Min and Liu, Daizong and Zuo, Zhiwen and Yang, Xun and Lin, Changting and Wang, Xun and Dong, Jianfeng}, title = {CAST: Context-Aware Dynamic Latent Space Transformation for Interactive Text-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38794-38803} }
DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Zhou and Wang, Shida and Hua, YongXiang and Cao, Haoyu and Xu, Linli}, title = {DiG: Differential Grounding for Enhancing Fine-Grained Perception in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1695-1705} }
From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Ji, Yiping and Le, Anjie and Zhu, Jiayuan and Pan, Jiazhen and Peng, Can and Deng, Jiajun and Liu, Fengbei and Wu, Junde}, title = {From Failure to Feedback: Group Revision Unlocks Hard Cases in Object-Level Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4816-4828} }
VisPlay: Self-Evolving Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yicheng and Huang, Chengsong and Li, Zongxia and Huang, Jiaxin and Yang, Yonghui}, title = {VisPlay: Self-Evolving Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26274-26284} }
DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yangfu and Zhan, Hongjian and Chen, Jiawei and Gong, Yuning and Liu, Qi and Lu, Yue}, title = {DeepScan: A Training-Free Framework for Visually Grounded Reasoning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19155-19164} }
APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Daoxuan and Chen, Ping and Xia, Xiaobo and Su, Xiu and Zhen, Ruichen and Xiao, Jianqiang and Yang, Shuo}, title = {APEX: A Decoupled Memory-based Explorer for Asynchronous Aerial Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15232-15242} }
EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaolong and Yang, Lan and Li, Ruyang and Fang, Shan and Liu, Yang and Zhao, Xiangmo}, title = {EE-RL: Vision Language Guided Reinforcement Learning with Explorer and Expert model for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32082-32092} }
Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bozhao and Wu, Shaocong and Shao, Tong and Yang, Senqiao and Shan, Qiben and Tian, Zhuotao and Su, Jingyong}, title = {Consistency Beyond Contrast: Enhancing Open-Vocabulary Object Detection Robustness via Contextual Consistency Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34617-34627} }
NeuROK: Generative 4D Neural Object Kinematics-
[pdf]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Chen and He, Guangzhao and Gao, Yue and Zhang, Yunzhi and Wu, Shangzhe and Wu, Jiajun}, title = {NeuROK: Generative 4D Neural Object Kinematics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39239-39251} }
Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Nishida, Kosuke and Chijiwa, Daiki}, title = {Rationale-Enhanced Decoding for Multi-modal Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19241-19252} }
D$^2$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Chenglong and Shen, Shuai and Li, Xiangsheng and Li, Yang}, title = {D\${\textasciicircum}2\$-FOSA: Dual-Diffusion Guided EEG-to-Image Reconstruction with Frequency-Oriented Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26698-26710} }
Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_Sario_2026_CVPR, author = {Di Sario, Francesco and Rebain, Daniel and Verbin, Dor and Grangetto, Marco and Tagliasacchi, Andrea}, title = {Spherical Voronoi: Directional Appearance as a Differentiable Partition of the Sphere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22529-22538} }
Understanding Counting Mechanisms in Large Language and Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hasani_2026_CVPR, author = {Hasani, Hosein and Izadi, Amirmohammad and Askari, Fatemeh and Bagherian, Mobin and Mohammadian, Sadegh and Izadi, Mohammad and Baghshah, Mahdieh Soleymani}, title = {Understanding Counting Mechanisms in Large Language and Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5125-5133} }
Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhaofeng and Qiu, Heqian and Wang, Lanxiao and Wu, Qingbo and Meng, Fanman and Pan, Lili and Li, Hongliang}, title = {Test-time Ego-Exo-centric Adaptation for Action Anticipation via Multi-Label Prototype Growing and Dual-Clue Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16988-16999} }
DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuan and Zhao, Sijie and Cheng, Jing and Xu, Hao and Jiao, Shaohui}, title = {DreamStereo: Towards Real-Time Stereo Inpainting for HD Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25393-25402} }
MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wang, Xingyuan and Fan, Yang and Zhang, Yunan and Wu, Xiangping and Chen, Qingcai}, title = {MMDIR: Multimodal Instruction-Driven Framework for Mixed-Degradation Document Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8387-8396} }
ID-Sim: An Identity-Focused Similarity Metric-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chae_2026_CVPR, author = {Chae, Julia and Kolkin, Nicholas and Wang, Jui-Hsien and Zhang, Richard and Beery, Sara and Ham, Cusuh}, title = {ID-Sim: An Identity-Focused Similarity Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11250-11262} }
Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Jiahao and Song, Chenxi and Cheng, Wei and Zhang, Chi}, title = {Free-Lunch Long Video Generation via Layer-Adaptive O.O.D Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1973-1982} }
Streaming Video Instruction Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Jiaer and Chen, Peixian and Zhang, Mengdan and Sun, Xing and Zhou, Kaiyang}, title = {Streaming Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31219-31229} }
Boosting Reasoning in Large Multimodal Models via Activation Replay-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Yun and Hu, Xiaobin and He, Qingdong and Zhang, Jiangning and Yan, Shuicheng and Lu, Shijian and Jiang, Yu-Gang}, title = {Boosting Reasoning in Large Multimodal Models via Activation Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19229-19240} }
Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Tao and Wang, Huili and Huang, Yuanhong and Wang, Wendan and Zhao, Lianchao and Wang, Jinrui and Qin, Zichen and Wang, Shangguang and Huang, Yongfeng}, title = {Black-box Membership Inference Attacks on the Pre-training Data of Image-generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {62-71} }
Enhancing Part-Level Point Grounding for Any Open-Source MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Jhang_2026_CVPR, author = {Jhang, Jin-Cheng and Wang, Fu-En and Yang, Xin and Qiao, Nan and Xia, Lu and Sun, Min and Kuo, Cheng-Hao}, title = {Enhancing Part-Level Point Grounding for Any Open-Source MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22900-22909} }
Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangling and Gao, Shuxuan and Liu, Bofan and Feng, Siqiang and Huang, Jirui and Chen, Yaxiong and Chen, Ziyu}, title = {Detecting AI-Generated Forgeries via Iterative Manifold Deviation Amplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35494-35503} }
RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiyan and Wang, Han and Wang, Yuhu and Cai, Junjie and Cao, Zhe and Yang, Jianzhong and Lu, Zhen}, title = {RoadSceneBench: A Lightweight Benchmark for Mid-Level Road Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23720-23729} }
OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Han and Peng, Xinyu and Wang, Yaoming and Peng, Zelin and Chen, Xin and Weng, Rongxiang and Wang, Jingang and Cai, Xunliang and Dai, Wenrui and Xiong, Hongkai}, title = {OneCAT: Decoder-Only Auto-Regressive Model for Unified Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30235-30245} }
A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Tangzheng and Hu, Guanyu and Ren, Yijing and Kollias, Dimitrios and Celiktutan, Oya}, title = {A Closed-Form Solution for Debiasing Vision-Language Models with Utility Guarantees Across Modalities and Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31672-31682} }
Vista4D: Video Reshooting with 4D Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Kuan Heng and Liu, Zhizheng and Salamanca, Pablo and Kant, Yash and Burgert, Ryan and Xu, Yuancheng and Namekata, Koichi and Zhao, Yiwei and Zhou, Bolei and Goldblum, Micah and Debevec, Paul and Yu, Ning}, title = {Vista4D: Video Reshooting with 4D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32671-32682} }
LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Yuwei and Zhao, Ganlong and Qin, Yipeng and Liu, Si and Liu, Yang and Lin, Liang and Li, Guanbin}, title = {LookasideVLN: Direction-Aware Aerial Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32441-32450} }
Tracking through Severe Occlusion via Event-Derived Transient Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Hao and Liu, Yujin and Liu, Haoyue and Wang, Zhenyu and Peng, Shihan and Shi, Zhiwei and Chang, Yi and Yan, Luxin}, title = {Tracking through Severe Occlusion via Event-Derived Transient Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29526-29536} }
Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Jiayu and Wang, Jing and Zhang, Qi and Wang, Wanlong and Xue, Jun}, title = {Inconsistency-aware Multimodal Schrodinger Bridge for Deepfake Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8697-8706} }
From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jingxi and Zhang, Yixiao and Qian, Xiaoye and Li, Zongxia and Fermuller, Cornelia and Chen, Caren and Aloimonos, Yiannis}, title = {From Inpainting to Layer Decomposition: Repurposing Generative Inpainting Models for Image Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16054-16063} }
AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanyang and Qin, Rongjun}, title = {AeroDGS: Physically Consistent Dynamic Gaussian Splatting for Single-Sequence Aerial 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19011-19021} }
ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yitong and Zhou, Lihua and Wei, Jiwei and Ran, Ran and He, Shiyuan and Ma, Zeyu and Li, Shuaifeng and Li, Nianxin and Shen, Heng Tao}, title = {ViTPrompt: Training-Free Prompt Refinement with Visual Tokens for Open-Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3111-3121} }
FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, You and Zhou, Dewei and Ma, Fan and Li, Fu and He, Dongliang and Yang, Yi}, title = {FoleyDirector: Fine-Grained Temporal Steering for Video-to-Audio Generation via Structured Scripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29254-29264} }
SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanmao and Zhao, Chenxi and Duan, Ye}, title = {SMVRT: Implicit Human 3D Modeling Using Sparse Multi-View Volumetric Reconstruction with Transformer Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14556-14566} }
HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Jie-En and Chen, Hong-En and Kuo, C.-C. Jay}, title = {HCL-FF: Hierarchical and Contrastive Learning for Forward-Forward Algorithm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27429-27438} }
HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fengyuan and Sur, Tanuj and Tse, Tze Ho Elden and Yao, Angela}, title = {HumanBA: Human-Aware Bundle Adjustment via Global Human-Camera Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13846-13855} }
Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Yuan, Xiaohan and Wu, Xingchen and Xu, Chongyang and Li, Kun and Huang, Buzhen}, title = {Stability-Driven Motion Generation for Object-Guided Human-Human Co-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38355-38365} }
Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hui and Chen, Kecheng and Wang, Jialiang and Liu, Xianming and Wang, Wenya and Li, Haoliang}, title = {Beyond Heuristic Prompting: A Concept-Guided Bayesian Framework for Zero-Shot Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5521-5531} }
Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Jinhu and Lou, Yihang and Si, Qingyi and Zhang, Shudong and Su, Sen}, title = {Diagnosing and Repairing Unsafe Channels in Vision-Language Models via Causal Discovery and Dual-Modal Safety Subspace Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31693-31702} }
Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Wenfei and Mei, Jilin and Shen, Tong and Wu, Xumin and Wang, Shuo and Min, Chen and Hu, Yu}, title = {Beyond Endpoints: Path-Centric Reasoning for Vectorized Off-Road Network Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13254-13263} }
Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Peng, Peixi and Tan, Guang and Xu, Haoran and Li, Yaokun and Chang, Yiqian and Wang, Shuaixian and Li, Luntong}, title = {Resolving the Stability-Plasticity Dilemma in Reinforcement Learning via Complementary Continual Critics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22348-22357} }
Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Claessens_2026_CVPR, author = {Claessens, Cris and Viviers, Christiaan and D'Amicantonio, Giacomo and Bondarev, Egor and van der Sommen, Fons}, title = {Scaling Self-Supervised and Cross-Modal Pretraining for Volumetric CT Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13636-13647} }
Hierarchically Robust Zero-shot Vision-language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Junhao and Zhang, Yifei and Zhu, Hao and Ong, Yew-Soon and Koniusz, Piotr}, title = {Hierarchically Robust Zero-shot Vision-language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37642-37652} }
PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuan and Ding, Guiguang and Han, Jungong}, title = {PACT: Phase-Like Transition Constraints in Adapter-Based Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17999-18009} }
AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhenglin and Ma, Fan and Gui, Chengzhuo and Xia, Xiaobo and Fan, Hehe and Yang, Yi and Chua, Tat-Seng}, title = {AnchorFlow: Training-Free 3D Editing via Latent Anchor-Aligned Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14387-14397} }
Recurrent Video Masked Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zoran_2026_CVPR, author = {Zoran, Daniel and Parthasarathy, Nikhil and Yang, Yi and A Hudson, Drew and Carreira, Jo\~ao and Zisserman, Andrew}, title = {Recurrent Video Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17744-17755} }
CLEP: Contrastive Language-Pose Pretraining-
[pdf]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Wang, Huayu and Huang, Hsiang-Wei and An, Zhaochong and Hwang, Jenq-Neng and Zhang, Huaping and Li, Lei}, title = {CLEP: Contrastive Language-Pose Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30696-30706} }
SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Jiaming and Zhan, Yifeng and Liu, Chunlin and Zheng, Weihua and Peng, Bingye and Liang, Qiwei and Cai, Boyang and Mai, Xiaochun and Nie, Qiang}, title = {SDDF: Specificity-Driven Dynamic Focusing for Open-Vocabulary Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13049-13058} }
GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Yang, Fan and Xiong, Ruixue and Fan, Yiyan and Xie, Zhifeng and Wang, Zeyu}, title = {GardenDesigner: Encoding Aesthetic Principles into Jiangnan Garden Construction via a Chain of Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24417-24427} }
Lens Component Deletion based on Differentiable Ray Tracing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenguan and Zhang, Qirun and Sun, Tuo and He, Jiajian and Xu, Jiahui and Feng, Huajun and Li, Qi}, title = {Lens Component Deletion based on Differentiable Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5637-5646} }
MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Joshua and Aghajanzadeh, Sara and Zhu, Zhen and Forsyth, David}, title = {MR. Illuminate: Zero-Shot Low-Light Image Enhancement with Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8460-8470} }
Beyond Caption-Based Queries in Video Moment Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Pujol-Perich_2026_CVPR, author = {Pujol-Perich, David and Clap\'es, Albert and Damen, Dima and Escalera, Sergio and Wray, Michael}, title = {Beyond Caption-Based Queries in Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18545-18554} }
AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Hongyi and Rahman, Mohammad Mahdinur and Dong, MingKang and Pu, Muxin and Aloqaily, Moayad and Li, Jie and Li, Xinfeng and Shen, Jialie and Qiu, Meikang and Wen, Qingsong}, title = {AutoDebias: An Automated Framework for Detecting and Mitigating Backdoor Biases in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29285-29294} }
Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yi and Yang, Yu-Bin}, title = {Vision-Oriented Lightweight Neural Architecture Search with Budget-Adaptive Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41985-41995} }
NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zoomers_2026_CVPR, author = {Zoomers, Brent and Hahlbohm, Florian and Vanherck, Joni and Jorissen, Lode and Magnor, Marcus and Michiels, Nick}, title = {NVGS: Neural Visibility for Occlusion Culling in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29824-29833} }
CADC: Content Adaptive Diffusion-Based Generative Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Xihua and Zhu, Lingyu and Zhang, Tianyu and Liu, Dong and Wang, Shiqi and Wang, Jing}, title = {CADC: Content Adaptive Diffusion-Based Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32936-32946} }
GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Li, Ziye and Ding, Henghui and Tao, Dacheng}, title = {GlyphPrinter: Region-Grouped Direct Preference Optimization for Glyph-Accurate Visual Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7674-7683} }
TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yifei and Bao, Yajie and Qian, Jiachen and Wu, Shuang and Lin, Youtian and Zhu, Hao and Li, Buyu and Zhang, Feihu and Cao, Xun and Yao, Yao}, title = {TEXTRIX: Latent Attribute Grid for Native Texture Generation and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27104-27113} }
CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Savadikar_2026_CVPR, author = {Savadikar, Chinmay and Dai, Michelle and Wu, Tianfu}, title = {CHEEM: Continual Learning by Reuse, New, Adapt and Skip - A Hierarchical Exploration-Exploitation Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25066-25076} }
F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hengzhi and Feng, Liqian and Wu, Wenhua and Zhu, Xiaogang and Wu, Qiuxia and Shan, Lianlei and Hu, Kun}, title = {F2Net: A Frequency-Fused Network for Ultra-High Resolution Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13275-13284} }
Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kormushev_2026_CVPR, author = {Kormushev, Nikolay and \v{S}ari\'c, Josip and Kristan, Matej}, title = {Mitigating Objectness Bias and Region-to-Text Misalignment for Open-Vocabulary Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17905-17915} }
Relational Visual Similarity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thao and Mo, Sicheng and Singh, Krishna Kumar and Wang, Yilin and Shi, Jing and Kolkin, Nicholas and Shechtman, Eli and Lee, Yong Jae and Li, Yuheng}, title = {Relational Visual Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24141-24150} }
Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Bingjun and Guo, Jialin and Yao, Yue and Ding, Xinpeng}, title = {Adversarial Style Optimization: Enhancing VLM Jailbreaks by GRPO-based Stylistic Triggers Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11-19} }
Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tao and Zhong, Yilei and Du, Yuxin and Zhang, Jingjing and Liu, Jiting and Chen, Yinxinyu and Gu, Encheng and Liu, Ziyan and Cai, Hongyi and Zou, Yanwen and Zou, Lixing and Zhou, Zhaoye and Li, Gen and Zhao, Bo}, title = {Evo-1: Lightweight Vision-Language-Action Model with Preserved Semantic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13397-13406} }
Variational Graph-based Normal Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Lixiong and Yu, Bohan and Prisacariu, Victor Adrian and Sato, Imari}, title = {Variational Graph-based Normal Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12663-12672} }
ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers-
[pdf]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Qinghao and Chen, Bingzhi and Liu, Yishu and Lu, Minhua and Lu, Guangming}, title = {ReMoE: Region-Mixture Experts for Adversarially-Robust Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37674-37683} }
Hyperbolic Gramian Volumes for Multimodal Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Na_2026_CVPR, author = {Na, Saiyang and Jiang, Feng and Zhou, Qifeng and Zhong, Wenliang and Dang, Thao M. and Guo, Yuzhi and Ma, Hehuan and Li, Chunyuan and An, Weizhi and Huang, Junzhou}, title = {Hyperbolic Gramian Volumes for Multimodal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37756-37765} }
ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Ai_2026_CVPR, author = {Ai, Jiaxin and Feng, Yukang and Zhang, Fanrui and Sun, Jianwen and Li, Zizhen and Li, Chuanhao and Chang, Yifan and Wu, Wenxiao and Wang, Ruoxi and Zhai, Mingliang and Zhang, Kaipeng}, title = {ProSoftArena: Benchmarking Hierarchical Capabilities of Multi-modal Agents in Professional Software Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34586-34595} }
EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shuo and Zhang, Gaojia and Tan, Min and Yin, Yufei and Pan, Gang}, title = {EReCu: Pseudo-label Evolution Fusion and Refinement with Multi-Cue Learning for Unsupervised Camouflage Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25547-25556} }
DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yanbin and Yan, Jiangyue and Kang, Chun and Chen, Yang and Liu, Hua and Kwok, James and Zhang, Yu}, title = {DynamicGTR: Leveraging Graph Topology Representation Preferences to Boost VLM Capabilities on Graph QAs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40822-40832} }
BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Changzhou and Ma, Wanlun and Tang, Xi and Hu, Kun and Wen, Sheng and Xiang, Yang}, title = {BoostSLT: Boosting Sign Language Translation via a Plug-and-Play Diffusion-Based Semantic Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28828-28837} }
OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuc and Nhu, Anh N. and Lin, Ming C.}, title = {OpenVO: Open-World Visual Odometry with Temporal Dynamics Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14208-14218} }
Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Lin, Junwei and Hong, Ren and Liu, Shengjie and Fang, Yuming and Qian, Feng and Zuo, Yifan}, title = {Learning and Aligning Click-Aware Shape Prior for Interactive Amodal Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20478-20487} }
Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Pei and Ding, Junfeng and Yang, Jiaqi and Wang, Yulong and Ma, Jie and Nan, Liangliang}, title = {Hg-I2P: Bridging Modalities for Generalizable Image-to-Point-Cloud Registration via Heterogeneous Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39042-39051} }
A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yichang and Liu, Gaowen and Kompella, Ramana Rao and Huang, Tiansheng and Hu, Sihao and Ilhan, Fatih and Tekin, Selim Furkan and Yahn, Zachary and Liu, Ling}, title = {A Multi-Agent Perception-Action Alliance for Efficient Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19497-19507} }
UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Chengyu and Chen, Jintao and Bai, Xiang and Chen, Yilong and She, Qi and Lu, Ming and Zhang, Shanghang}, title = {UniEdit-I: Training-free Image Editing for Unified VLM via Iterative Understanding, Editing and Verifying}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29750-29759} }
ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Youngeun and Zhang, Youjia and Liu, Huiling and Jung, Aecheon and Lee, Sunwoo and Hong, Sungeun}, title = {ZOO-Prune: Training-Free Token Pruning via Zeroth-Order Gradient Estimation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39572-39582} }
OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Zhichuan and Luo, Jun}, title = {OntoAug: Rethinking Generative Data Augmentation via Ontology Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22519-22528} }
EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Mingchen and Deng, Xiang and Wei, Jie and Jiang, Dongmei and Nie, Liqiang and Guan, Weili}, title = {EnergyAction: Unimanual to Bimanual Composition with Energy-Based Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20845-20855} }
Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Sarker_2026_CVPR, author = {Sarker, Najibul Haque and Ibn Abdul Hakim, Zaber and Asgarov, Ali and Tang, Chia-Wei and Ishmam, Alvi Md and Thomas, Chris}, title = {Immunizing Models Against Harmful Long-Horizon Fine-Tuning via Contractive Optimization Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34940-34949} }
Chain-of-Thought Guided Multi-Modal Object Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ya and Li, Shihao and Liu, Zhaojun and Zheng, Aihua and Li, Chenglong and Tang, Jin}, title = {Chain-of-Thought Guided Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37705-37714} }
MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yinuo and Fan, Yanbo and Wang, Xuan and Zhou, Boyao and Guo, Yu and Shen, Yujun and Wang, Fei}, title = {MimicTalker: A Multimodal Interactive and Memory-Enhanced Framework for Real-Time Dyadic 3D Head Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32399-32409} }
S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Yuzhou and Tian, Qijian and Zhu, He and Jiang, Xiaoqi and Cao, Guangzhi and Ma, Lizhuang and Xie, Yuan and Tan, Xin}, title = {S2D: Sparse to Dense Lifting for 3D Reconstruction with Minimal Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7491-7502} }
From Rays to Projections: Better Inputs for Feed-Forward View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zirui and Jiang, Zeren and Oswald, Martin R. and Song, Jie}, title = {From Rays to Projections: Better Inputs for Feed-Forward View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29012-29022} }
NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Fangzhou and Wang, Yuping and Guo, Yuliang and Huang, Zixun and Huang, Xinyu and Zhang, Haichong and Yamada, Kazunori and Tu, Zhengzhong and Ren, Liu and Zhang, Ziming}, title = {NexusFlow: Unifying Disparate Tasks under Partial Supervision via Invertible Flow Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3761-3771} }
Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Lijing and Shi, Zhan and Huang, Chenglong and Wu, Jinyao and Li, Qiping and Huo, Zikang and Chen, Linsen and Zi, Chongde and Cao, Xun}, title = {Exploring Spatiotemporal Feature Propagation for Video-Level Compressive Spectral Reconstruction: Dataset, Model and Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12522-12532} }
MDS-VQA: Model-Informed Data Selection for Video Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Jian and Xu, Xiaoyu and Wang, Zhihua and Wang, Yilin and Adsumilli, Balu and Ma, Kede}, title = {MDS-VQA: Model-Informed Data Selection for Video Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22713-22722} }
Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ziyi and Wei, Zhipeng and Chen, Jingjing and Tan, Zhiyu and Li, Hao and Chen, Yi-Ping Phoebe}, title = {Aligning Multi-Character Narrative Image Generation with Multi-Aspect Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29244-29253} }
SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Tairan and Jin, Yulin and Liu, Junxu and Ye, Qingqing and Hu, Haibo}, title = {SEBA: Sample-Efficient Black-Box Attacks on Visual Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27861-27871} }
PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Jiacheng and Liu, Chunying and Guo, Hao and Wang, Ruohan and Gan, Hongping and Shi, Yilei}, title = {PP-Brep: Few-Shot B-rep Classification with Hybrid Graph Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41616-41625} }
Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Yukuan and Yang, Muli and Zhang, Jinhao and Wang, Yuxuan and Zhu, Yihang and Yan, Jiexi and Deng, Cheng}, title = {Multimodal Semantic Bias Mitigation for Diverse Text-To-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14779-14788} }
Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shi, Bin and Sun, Kai and Dong, Bo}, title = {Mitigating Instance Entanglement in Instance-Dependent Partial Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39659-39668} }
STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Huijie and Huang, Pengrui and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Qu, Liangqiong}, title = {STUR3D: Spatio-Temporal Unified Representation Learning for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33068-33077} }
InterRVOS: Interaction-Aware Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Woojeong and Kim, Seongchan and Lee, Jaeho and Kim, Seungryong}, title = {InterRVOS: Interaction-Aware Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10367-10376} }
VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Eskandar_2026_CVPR, author = {Eskandar, George and Shen, Fengyi and Altillawi, Mohammad and Chen, Dong and Bai, Yang and Yang, Liudi and Liu, Ziyuan}, title = {VideoWeaver: Multimodal Multi-View Video-to-Video Transfer for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29620-29630} }
TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Abousamra_2026_CVPR, author = {Abousamra, Shahira and Sood, Asmita and Plevritis, Sylvia}, title = {TopoSlide: Topologically-Informed Histopathology Whole Slide Image Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13691-13701} }
ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yiyang and Zhou, Feng and Yin, Xuedan and Cao, Pu and Dang, Yonghao and Yin, Jianqin}, title = {ResDiT: Evoking the Intrinsic Resolution Scalability in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40612-40621} }
REACH: Explicit Recovery Behavior for Diffusion Policies-
[pdf]
[supp]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Zundong and Chen, Junlin and Zhu, Jiayi and Xia, Kuanhao and Zhao, Boyi and Gu, Jiayuan}, title = {REACH: Explicit Recovery Behavior for Diffusion Policies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38498-38508} }
Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{van_Sprang_2026_CVPR, author = {van Sprang, Angela and Samson, Laurens and Lucic, Ana and Acar, Erman and Ghebreab, Sennay and Asano, Yuki M.}, title = {Same Content, Different Answers: Cross-Modal Inconsistency in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8781-8790} }
LNEM: Lunar Neural Elevation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Suwan and Yim, Jo Ryeong and Park, Kibaek and Kim, Dong-Gyu and Kim, Eunhyeuk and Jeong, Minsup and Sim, Chae Kyung and Lee, Seokju}, title = {LNEM: Lunar Neural Elevation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6508-6517} }
Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yujia and Wang, Yuanxiang and Guan, Zhenyu and Yang, Tiankun and Bao, Chenxi and Jin, Haopeng and Luo, Jinwen and Zuo, Xinyu and Duan, Lisheng and Liang, Haijin and Ma, Jin and Wang, Xinming and Tao, Ruiwen and Yi, Hongzhu}, title = {Omni IIE Bench: Benchmarking the Practical Capabilities of Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1089-1099} }
VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patel_2026_CVPR, author = {Patel, Maitreya and Li, Jingtao and Zhuang, Weiming and Yang, Yezhou and Lv, Lingjuan}, title = {VibeToken: Scaling 1D Image Tokenizers and Autoregressive Models for Dynamic Resolution Generations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2058-2068} }
Tracking by Predicting 3-D Gaussians Over Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baranwal_2026_CVPR, author = {Baranwal, Tanish and Singh, Himanshu Gaurav and Rajasegaran, Jathushan and Malik, Jitendra}, title = {Tracking by Predicting 3-D Gaussians Over Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42527-42537} }
TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yihui and Lv, Chengxin and Tang, Zichen and Yang, Hongyu and Huang, Di}, title = {TokenSplat: Token-aligned 3D Gaussian Splatting for Feed-forward Pose-free Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40886-40895} }
Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Xinhao and Pei, Gensheng and Sun, Zeren and Yao, Yazhou and Shen, Fumin and Wang, Wenguan}, title = {Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26909-26919} }
Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianchao and Zhou, Xinyu and Li, Youcheng and Shi, Jiayou and Li, Tianle and Chen, Liangming and Ren, Lei and Li, Yong-Lu}, title = {Diagnose, Correct, and Learn from Manipulation Failures via Visual Symbols}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42386-42395} }
CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Meng and Lin, Dawei and Xie, Hongxia and Wu, Tieru and Ma, Rui}, title = {CAD-Refiner: A Unified Framework for CAD Generation and Iterative Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3244-3253} }
RewardFlow: Generate Images by Optimizing What You Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Susladkar_2026_CVPR, author = {Susladkar, Onkar and Jang, Dong-Hwan and Prakash, Tushar and Juvekar, Adheesh and Shah, Vedant and Barik, Ayush and Bashir, Nabeel and Wahed, Muntasir and Shrirao, Ritish and Lourentzou, Ismini}, title = {RewardFlow: Generate Images by Optimizing What You Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20066-20076} }
PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hoang_2026_CVPR, author = {Hoang, Nhat and Vu, Minh and Thai, My T. and Bhattarai, Manish}, title = {PAS: Prelim Attention Score for Detecting Object Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18273-18283} }
HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinlong and Lin, Di and Gao, Shaoyiyi and Liu, Yaxuan and He, Jixian and Li, Jiaxin and Liu, Ruonan and Guo, Qing and Yang, Kairui and Feng, Wei}, title = {HOPS: Hierarchical Open-vocabulary Part Segmentation with Attention-Aware Filtering and Affinity-Guided Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27719-27729} }
Captain Safari: A World Engine with Pose-Aligned 3D Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chou_2026_CVPR, author = {Chou, Yu-Cheng and Wang, Xingrui and Li, Yitong and Wang, Jiahao and Liu, Hanting and Xie, Cihang and Yuille, Alan and Xiao, Junfei}, title = {Captain Safari: A World Engine with Pose-Aligned 3D Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25347-25357} }
When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Liangwei and Xu, Jiaqi and Ding, Jianwei and Deng, Qiyao}, title = {When LoRA Betrays: Backdooring Text-to-Image Models by Masquerading as Benign Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8577-8586} }
When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Ye and Chu, Junjie and Li, Mingjie and Lin, Chenhao and Shen, Chao and Backes, Michael and Shen, Yun and Zhang, Yang}, title = {When Understanding Becomes a Risk: Authenticity and Safety Risks in the Emerging Image Generation Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39372-39382} }
PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Di_2026_CVPR, author = {Di, Yan and Li, Yuheng and Wang, Yaoxing and Liu, Mengge and Gao, Shan and Ji, Xiangyang}, title = {PAMotion: Physics-Aware Motion Generation for Full-Body Interaction with Multiple Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30661-30674} }
Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yusong and Gu, Zheyuan and Mao, Keyu and Shao, Minghao and Xu, Mingkun and Tiwari, Prayag and Shao, Jiawei and Zhao, Qingsong}, title = {Streaming Video Crime Anticipation with Spatio-Temporal Causal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16933-16943} }
TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanxuan and Shi, ShuHui and Zhang, Tianxiang and Guo, Zhetao and Zixuan, Huang and Li, You}, title = {TopoMA: Topology-Guided Multi-Agent Dense RGB 3D Reconstruction via Distributed Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21784-21793} }
Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yushi and Hong, Feng and Zheng, Huangjie and Chen, Xu and Chen, Zhiyong and Wang, Yanfeng and Yao, Jiangchao}, title = {Rejection Mixing: Fast Semantic Propagation of Mask Tokens for Efficient DLLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17619-17629} }
Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jihun and Kwon, Hoyong and Kweon, Hyeokjun and Yoon, Kuk-Jin}, title = {Bootstrapping Video Semantic Segmentation Model via Distillation-assisted Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10766-10777} }
ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Dharanikota, Krithika and Jia, Emily and Chen, Haiwei and Zhao, Yajie}, title = {ICTPolarReal: A Polarized Reflection and Material Dataset of Real World Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6518-6527} }
Scaling View Synthesis Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Evan and Ryu, Hyunwoo and Mitchel, Thomas W. and Sitzmann, Vincent}, title = {Scaling View Synthesis Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28893-28902} }
Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jinglin and Li, Yi and Sun, Chuxiong and Xu, Xiao and Li, Jiangmeng and Xu, Fanjiang}, title = {Multi-modal Test-time Adaptation via Adaptive Probabilistic Gaussian Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30268-30277} }
Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Linxiao and Zheng, Siming and Wang, Zerong and Zhang, Hao and Chen, Jinwei and Li, Bo and Chen, Shifeng and Jiang, Peng-Tao}, title = {Towards Photorealistic and Efficient Bokeh Rendering via Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {231-240} }
Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Jiahao and Fu, Guanqiao and An, Wenbin and Tan, Yap-Peng and Kot, Alex C. and Lu, Shijian}, title = {Cross-Domain Few-Shot Segmentation via Multi-view Progressive Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41594-41604} }
No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Hai X. and Hoffmann, David T. and Guerrero, Ricardo and Martinez, Brais}, title = {No Hard Negatives Required: Concept Centric Learning Leads to Compositionality without Degrading Zero-shot Capabilities of Contrastive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33901-33910} }
V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weijia and Xiang, Haoen and Wang, Tianxu and Wu, Shuaibing and Xia, Qiming and Wang, Cheng and Wen, Chenglu}, title = {V2U4Real: A Real-world Large-scale Dataset for Vehicle-to-UAV Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4728-4737} }
Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinyue and Liu, Jin and Wang, Hongbo and He, Ran and Huang, Huaibo}, title = {Think-Then-Generate: Structural Chain-of-Thought Reasoning for Consistent 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34270-34280} }
CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Wang, Kun and Pan, Yining and Zhao, Na}, title = {CCF: Complementary Collaborative Fusion for Domain Generalized Multi-Modal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18745-18754} }
Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ruoran and Cheng, Haoyu and Dong, Bin and Wang, Qiufeng}, title = {Hilbert-Geo: Solving Solid Geometric Problems by Neural-Symbolic Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9658-9667} }
PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jianyu and Chen, Sixiang and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Zhai, Fuxiang and Luo, Junfeng and Wei, Xiaoming and Wang, Lujia and Zhu, Lei}, title = {PosterReward: Unlocking Accurate Evaluation for High-Quality Graphic Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7762-7772} }
Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ligong and Guo, Yeting and Chi, Haoang}, title = {Structural-Semantic Perception for Diffusion-Guided Temporal Forgery Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35384-35393} }
OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Bin and Zheng, Sipeng and Luo, Hao and Li, Boyuan and Liu, Jing and Lu, Zongqing}, title = {OpenT2M: No-frill Motion Generation with Open-source, Large-scale, High-quality Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30640-30649} }
PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jiacheng and Ding, Hui and Zhang, Shiyu and Huo, Guoping}, title = {PGR-Net: Prior-Guided ROI Reasoning Network for Brain Tumor MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22816-22825} }
GenMask: Adapting DiT for Segmentation via Direct Mask Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuhuan and Zhuang, Xianwei and Cai, Yuxuan and Ma, Chaofan and Bai, Shuai and Yao, Jiangchao and Zhang, Ya and Lin, Junyang and Wang, Yanfeng}, title = {GenMask: Adapting DiT for Segmentation via Direct Mask Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20455-20467} }
DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jueqing and Qi, Yuanyuan and Yang, Xiaohao and Niu, Shuaicheng and Ke, Fucai and Zhou, Shujie and Tan, Wei and Lin, Jionghao and Buntine, Wray and Rezatofighi, Hamid and Du, Lan}, title = {DPL: Decoupled Prototype Learning for Enhancing Robustness of Vision-Language Transformers to Missing Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39219-39229} }
FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yuchen and Zhou, Yizhi and Wang, Junxiao and Xie, Xin and Qi, Heng}, title = {FedCART: Tackling Long-Tailed Distributions in Federated Adversarial Training via Classifier Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24557-24566} }
STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiankuo and Zhu, Xiangyu and Wang, Zidu and Lei, Zhen}, title = {STAvatar: Soft Binding and Temporal Density Control for Monocular 3D Head Avatars Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10996-11005} }
PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoya and Wang, Long and Liu, Yan and Liu, Xinyi and Tan, Hanlin and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {PiLoT: Neural Pixel-to-3D Registration for UAV-based Ego and Target Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5379-5388} }
Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhenghao and Yao, Huilin and Wang, Kaikai and Shu, Lin}, title = {Reading Your Actions: Learning Generalizable Action Representations via Pre-training AEMG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20108-20117} }
Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Zidian and Wu, Ancong}, title = {Pano3DComposer: Feed-Forward Compositional 3D Scene Generation from Single Panoramic Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5902-5911} }
Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung E. and Yun, Dae Hee and Patel, Aditi V. and Hon, Madeline and Guan, Webster and Lee, Taegeon and Nguyen, Brian}, title = {Toward Generalizable Whole Brain Representations with High-Resolution Light-Sheet Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35270-35279} }
Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Qiu, Xianglin and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Frequency-Aware Affinity for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20468-20477} }
Unified Personalized Understanding, Generating and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yu and Lin, Tianwei and Zhu, Ruike and Yuan, Yuqian and Zheng, Haoyu and Liang, Liang and Zhang, Wenqiao and Shao, Feifei and Li, Haoyuan and He, Wanggui and Jiang, Hao and Zhuang, Yueting}, title = {Unified Personalized Understanding, Generating and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29400-29409} }
OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yanqing and Li, Xianhang and Zhang, Letian and Wang, Zirui and Zheng, Zeyu and Zhou, Yuyin and Xie, Cihang}, title = {OpenVision 2: A Family of Generative Pretrained Visual Encoders for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39164-39174} }
OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Minghang and Yin, Zihao and Yang, Yi and Peng, Yuxin and Liu, Yang}, title = {OmniVTG: A Large-Scale Dataset and Training Paradigm for Open-World Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24620-24629} }
Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Xiaoxing and Huang, Qiang and Li, Lingyu and Chang, Xiaojun and Yu, Jun}, title = {Cut to the Chase: Training-free Multimodal Summarization via Chain-of-Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26219-26229} }
MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyi and Ren, Pengfei and Zhang, Haoyang and Zhan, Hanling and Li, Yingxi and Xie, Liang and Gao, Yue and Yin, Erwei}, title = {MGDHand: Multi-Granularity Prior-to-Inertial Distillation Framework for Sequential 3D Hand Pose Estimation from Sparse IMUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13996-14005} }
FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Muxi and Zhang, Zhaohua and Zhao, Chenchen and Chen, Mingyang and Jiang, Wenyu and Jiang, Tianwen and Zhuo, Jianhuan and Tang, Yu and Xiao, Qiuyong and Zhang, Jihong and Xu, Qiang}, title = {FailureAtlas: Mapping the Failure Landscape of T2I Models via Active Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40782-40791} }
Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jingbo and Zhang, Qichao and Tu, Songjun and Fang, Xing and Zheng, Yupeng and Li, Haoran and Chen, Ke and Zhao, Dongbin}, title = {Saliency-Guided Representation with Consistency Policy Learning for Visual Unsupervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19508-19517} }
I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ling_2026_CVPR, author = {Ling, Lu and Ge, Yunhao and Sheng, Yichen and Bera, Aniket}, title = {I-Scene: 3D Instance Models are Implicit Generalizable Spatial Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26974-26983} }
When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators-
[pdf]
[supp]
[bibtex]@InProceedings{Adamkiewicz_2026_CVPR, author = {Adamkiewicz, Krzysztof and Moser, Brian B. and Frolov, Stanislav and Nauen, Tobias Christian and Raue, Federico and Dengel, Andreas}, title = {When Pretty Isn't Useful: Investigating Why Modern Text-to-Image Models Fail as Reliable Training Data Generators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36660-36669} }
Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huseljic_2026_CVPR, author = {Huseljic, Denis and Herde, Marek and Rauch, Lukas and Hahn, Paul and Sick, Bernhard}, title = {Cleaning the Pool: Progressive Filtering of Unlabeled Pools in Deep Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22238-22247} }
Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Ni, Haomiao and Zhang, Yunbei and Hamm, Jihun and Li, Zechen and Ding, Zhengming}, title = {Seeing Clearly, Reasoning Confidently: Plug-and-Play Remedies for Vision Language Model Blindness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18806-18815} }
Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Pengfei and Cao, Meng and Wang, Yingyao and Wang, Yi and Dong, Jiahua and Song, Jun and Cheng, Yu and Zheng, Bo and Liang, Xiaodan}, title = {Thinking with Drafts: Speculative Temporal Reasoning for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36255-36266} }
Dataset Distillation by Influence Matching-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wang, Wang and Wu, Sitong and Wu, Xiuzhe and Sun, Yang-Tian and Chang, Chirui and Zhang, Shaofeng and Qi, Xiaojuan}, title = {Dataset Distillation by Influence Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19654-19664} }
Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jianzhe and Wang, Churan and Zhang, Weiyi and Li, Jianghua and Li, Li-An and Wang, Wenguan and Zhu, Yixin and Wang, Yizhou}, title = {Clinically-Grounded Counterfactual Reasoning for Medical Video Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7014-7025} }
RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Wang, Haiyang and Wang, Shiao and Chen, Qiang and Jin, Jiandong and Song, Haoyu and Jiang, Bo and Li, Chenglong}, title = {RGB-Event based Pedestrian Attribute Recognition: A Benchmark Dataset and An Asymmetric RWKV Fusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32745-32755} }
Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Weidong and Liang, Zhiyuan and Wan, Xinyan and Zhu, Chen and Xu, Zhaopan and Zhou, Pengfei and Song, Yan and You, Yang and Zhao, Wangbo}, title = {Efficient Video Object Segmentation and Tracking with Recurrent Dynamic Submodel}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20912-20921} }
Zero-Shot Depth Completion with Vision-Language Model-
[pdf]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiqiang and Wu, Yuan and Lee, Gim Hee}, title = {Zero-Shot Depth Completion with Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19833-19843} }
Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Xinyu and Li, Han and Huang, Yuyang and Zheng, Ziyang and Wang, Yaoming and Chen, Xin and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Holistic Modeling for Video Frame Interpolation with Auto-regressive Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11448-11458} }
R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sohn_2026_CVPR, author = {Sohn, Tin Stribor and Dillitzer, Maximilian and Corso, Jason J. and Sax, Eric}, title = {R4: Retrieval-Augmented Reasoning for Vision-Language Models in 4D Spatio-Temporal Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38711-38721} }
WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Yuxuan and Ming, Xin and Li, Tianxiao and Shen, Zhuofan and Zhang, Qixuan and Xu, Lan and Xu, Feng}, title = {WildCap: Facial Albedo Capture in the Wild via Hybrid Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10909-10920} }
SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Qiu, Heqian and Wang, Lanxiao and Qiu, Benliu and Meng, Fanman and Xu, Linfeng and Li, Hongliang}, title = {SAVA-X: Ego-to-Exo Imitation Error Detection via Scene-Adaptive View Alignment and Bidirectional Cross View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28062-28073} }
SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Fei and Chen, Zhuo and Yuan, Yifu and Dong, Zibin and Yao, Xianze and Luo, Shan and Hao, Jianye and Deng, Jiankang and Zafeiriou, Stefanos}, title = {SemanticVLA: Towards Semantic Reasoning over Action Memorization via Synergistic Explicit Trace and Latent Action Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12237-12247} }
AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhiyu and Hui, Feng and Liu, Yu}, title = {AERGS-SLAM: Auto-Exposure-Robust Stereo 3D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40929-40938} }
Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xinyue and Ma, Zhiyuan and Sun, Lingchen and Guo, Yanjun and Zhang, Lei}, title = {Photo3D: Advancing Photorealistic 3D Generation through Structure-Aligned Detail Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34237-34247} }
Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Wenyu and Li, Hufei and Jin, Ruijin and Kong, Xiangheng and Jiang, Yuning}, title = {Rosetta Stone For Unified MLLMs: A Unified Tokenizer to Decipher Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22964-22974} }
Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Jungwook and Kim, Minjeong and Lee, Younkwan and Shin, Seungho and Baik, Sungyong}, title = {Anomaly as Non-Conformity via Training-Free Graph Laplacian Energy Minimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21336-21345} }
TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jinlun and Liao, Jiang and Lai, Runhe and Lu, Xinhua and Zhuang, Jiaxin and Gan, Zhiyong and Wang, Ruixuan}, title = {TTL: Test-time Textual Learning for OOD Detection with Pretrained Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27590-27599} }
Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Yuting and Cheng, Xilong and Qin, Yunxiao and Li, Zhengnan and Zhang, Jingjing}, title = {Towards Stable Self-Supervised Object Representations in Unconstrained Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10545-10555} }
Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyu and Liu, Dong and Chen, Chang Wen}, title = {Ultra-Low Bitrate Perceptual Image Compression with Shallow Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12118-12128} }
FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Lu, Yongkang and Zhou, Yimin and Li, Jiawei and Ren, Yifan and Xue, Yuerong and Xia, Shu-Tao and Chen, Bin}, title = {FreqSIC: Frequency-aware Stereo Image Compression with Bi-directional Checkerboard Context Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19393-19402} }
T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Chaohong and He, Yihan and Nie, Yongwei and Ma, Fei and Xu, Xuemiao and Long, Chengjiang}, title = {T2SGrid: Temporal-to-Spatial Gridification for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3443-3454} }
LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dash_2026_CVPR, author = {Dash, Aditya Ranjan and Battrawy, Ramy and Schuster, Ren\'e and Stricker, Didier}, title = {LiREC-Net: A Target-Free and Learning-Based Network for LiDAR, RGB, and Event Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31421-31429} }
OVI-MAP: Open-Vocabulary Instance-Semantic Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zilong and Tombari, Federico and Pollefeys, Marc and Wald, Johanna and Barath, Daniel}, title = {OVI-MAP: Open-Vocabulary Instance-Semantic Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12606-12616} }
GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Xiuting and Pu, Ruizhi and Yao, Yuanhang and Yue, Kun and Tang, Zhiwen and Yu, Lixing}, title = {GDFA: Geometry-Driven Federated Unlearning with Directional Task Vector Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10346-10356} }
What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yingqi and Tong, Junlong and Zhao, Anhao and Shen, Xiaoyu}, title = {What Do Visual Tokens Really Encode? Uncovering Sparsity and Redundancy in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11987-11997} }
Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Shihan and Kulkarni, Nilesh and Hyde, David and Smirnov, Dmitriy}, title = {Less is More: Data-Efficient Adaptation for Controllable Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14811-14821} }
All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Junjiang and Wang, Liejun and Guo, Zhiqing}, title = {All in One: Unifying Deepfake Detection, Tampering Localization, and Source Tracing with a Robust Landmark-Identity Watermark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14106-14115} }
Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Zhengbo and Zhang, Zifan and Wang, Shaobo and Wang, Wei and Zhao, Bing and Wei, Hu and Zhang, Linfeng}, title = {Socratic-Geo: Synthetic Data Generation and Cross-Modal Geometric Reasoning via Multi-Agent Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23795-23804} }
Coordinate Denoising for Non-Equilibrium Molecular Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qianwei and Xu, Baile and Zhao, Jian and Shen, Furao}, title = {Coordinate Denoising for Non-Equilibrium Molecular Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3584-3593} }
Drainage: A Unifying Framework for Addressing Class Uncertainty-
[pdf]
[supp]
[bibtex]@InProceedings{Taha_2026_CVPR, author = {Taha, Yasser and Montavon, Gr\'egoire and K\"orber, Nils}, title = {Drainage: A Unifying Framework for Addressing Class Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41934-41943} }
Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Shannan and Zheng, Leqi and Lv, Keyu and Ni, Jingchen and Wei, Hongyang and Zhang, Jiajun and Wang, Guangting and LYU, Jing and Yuan, Chun and Rao, Fengyun}, title = {Learning Cross-View Object Correspondence via Cycle-Consistent Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6653-6663} }
Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Xin and Qin, Ke and Yin, Wen and Li, Yuan-Fang and Li, Ming and He, Tao}, title = {Can We Build Scene Graphs, Not Classify Them? FlowSG: Progressive Image-Conditioned Scene Graph Generation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10208-10218} }
SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Qunjie and Zhu, Weina}, title = {SATTC: Structure-Aware Label-Free Test-Time Calibration for Cross-Subject EEG-to-Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16887-16896} }
Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Minxing and Fan, Linlong and Wang, Qiushi and Wu, Ge and Luo, Yiyan and Yu, Yuhang and Chen, Jinwei and Wang, Yaxing and Fan, Qingnan and Yang, Jian}, title = {Restore Text First, Enhance Image Later: Two-Stage Scene Text Image Super-Resolution with Glyph Structure Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30553-30563} }
Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Fanqi and Tiezzi, Matteo and Apicella, Tommaso and Beyan, Cigdem and Murino, Vittorio}, title = {Lifelong Imitation Learning with Multimodal Latent Replay and Incremental Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6740-6749} }
Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiqing and Guo, Jinyue and Wang, Yaqi and Xiao, Haiyang and Zhang, Yuewei and Liu, Guohua and Wang, Hao Henry}, title = {Evo-Retriever: LLM-Guided Curriculum Evolution with Viewpoint-Pathway Collaboration for Multimodal Document Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31113-31123} }
A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Wentao and Mei, Guofeng and Wu, Yang and Gong, YongShun and Huang, Xiaoshui and Xiao, Liang}, title = {A Self-Conditioned Representation Guided Diffusion Model for Realistic Text-to-LiDAR Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9434-9444} }
Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {Concept Regions Matter: Benchmarking CLIP with a New Cluster-Importance Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2864-2874} }
Fine-Grained Multi Image Object Hallucination Benchmark-
[pdf]
[supp]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Joonki and Kim, Chaeyun and Choi, Hyungwook and Kim, Yejin and Kim, Kihyun and Jo, Yohan and Lee, Joonseok}, title = {Fine-Grained Multi Image Object Hallucination Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18295-18305} }
MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zilong and Ding, Zhengming and Niu, Pei and Sun, Wenhao and Guo, Feng}, title = {MixerCSeg: An Efficient Mixer Architecture for Crack Segmentation via Decoupled Mamba Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17969-17978} }
Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yutian and Qiu, Yuheng and Li, Ruogu and Patrikar, Jay and Scherer, Sebastian}, title = {Co-Me: Confidence Guided Token Merging for Visual Geometric Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14590-14599} }
EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuhan and Dai, Pengwen and Wang, Chuan and Wu, Dayan and Cao, Xiaochun}, title = {EagleNet: Energy-Aware Fine-Grained Relationship Learning Network for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23901-23911} }
Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xi and Zhang, Maojun and Liu, Yu and Yan, Shen}, title = {Local Precise Refinement: A Dual-Gated Mixture-of-Experts for Enhancing Foundation Model Generalization against Spectral Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20521-20531} }
Stereo World Model: Camera-Guided Stereo Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yang-Tian and Huang, Zehuan and Niu, Yifan and Ma, Lin and Cao, Yan-Pei and Ma, Yuewen and Qi, Xiaojuan}, title = {Stereo World Model: Camera-Guided Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18342-18353} }
MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yifan and Zhang, Chao and Ma, Ruifei and Gao, Fei and Yang, Zhifei and Qi, Jiaxing and Chen, Zhipeng}, title = {MotionEnhancer: Leveraging Video Diffusion for Motion-Enhanced Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2778-2787} }
OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeyu and Po, Lai Man and Xu, Xuyuan and Wang, Yexin and Gong, Guoping and Wu, Haoxuan and Yan, Chenbo and Li, Kun and Liu, Yuyang}, title = {OrionEdit: Bridging Reference and Source Images for Generalized Cross-Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9127-9138} }
Accelerating Streaming Video Large Language Models via Hierarchical Token Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyu and Liu, Xuyang and Gui, Xiyan and Lin, Xinying and Yang, Boxue and Liao, Chenfei and Chen, Tailai and Zhang, Linfeng}, title = {Accelerating Streaming Video Large Language Models via Hierarchical Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18523-18533} }
MS^2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Shenyin and Wang, Yishan and Li, Xinyu and Liu, Rui and Wang, Zhongyuan and Tian, Xin}, title = {MS{\textasciicircum}2Gait: A Multi-Scale Spatio-Temporal Fusion Network for LiDAR-based Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17184-17193} }
G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Jiyoung and Yang, Heejae and Lee, Jee-Hyong}, title = {G-MIXER: Geodesic Mixup-based Implicit Semantic Expansion and Explicit Semantic Re-ranking for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33891-33900} }
Reconstructing CLIP for Open-Vocabulary Dense Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajie and Zhang, Jinjin and Liu, Qingjie and Huang, Di}, title = {Reconstructing CLIP for Open-Vocabulary Dense Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39208-39218} }
Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Gyojin and Kim, Junmo}, title = {Cross-Axis Feature Fusion with Joint-Wise Motion Difference Prediction for Text-Based 3D Human Motion Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30618-30628} }
GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2026_CVPR, author = {Zang, Qi and Zhao, Dong and Pu, Nan and Li, Wenjing and Zhong, Zhun and Wang, Meng}, title = {GeCo: Geometry-Consistent Regularization for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {871-881} }
VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Juhye and Lee, Wooju and Hong, Dasol and Sung, Changki and Seo, Youngwoo and Kang, Dongwan and Myung, Hyun}, title = {VIRD: View-Invariant Representation through Dual-Axis Transformation for Cross-View Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33693-33702} }
Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiaxin and Yang, Yuanbo and Yang, Bangbang and Ma, Lin and Ma, Yuewen and Liao, Yiyi}, title = {Gen3R: 3D Scene Generation Meets Feed-Forward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25358-25369} }
REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Changyue and Chen, Minghao and Mao, Yiping and Yang, Chuxiao and Hu, Xinyuan and Ding, Jiajun and Yu, Zhou}, title = {REALM: An MLLM-Agent Framework for Open World 3D Reasoning Segmentation and Editing on Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16779-16788} }
Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yu and Li, Kang and Li, Zheng and Heng, Pheng-Ann}, title = {Unlocking Positive Transfer in Incrementally Learning Surgical Instruments: A Self-reflection Hierarchical Prompt Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21006-21015} }
AVGGT: Rethinking Global Attention for Accelerating VGGT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xianbing and Zhu, Zhikai and Lou, Zhengyu and Yang, Bo and Tang, Jinyang and Zhang, Liqing and Wang, He and Zhang, Jianfu}, title = {AVGGT: Rethinking Global Attention for Accelerating VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {251-260} }
Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Hadji, Isma and Sanchez, Enrique and Bulat, Adrian and Kuo, Sy-Yen and Timofte, Radu and Tzimiropoulos, Georgios and Martinez, Brais}, title = {Restore, Assess, Repeat: A Unified Framework for Iterative Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15506-15515} }
CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiyu and Sheng, Dianmo and Chu, Qi and Chen, Shilong and Gong, Tao and Wei, Zhou and Yu, Nenghai}, title = {CDICS: Delving Into Fine-Grained Attribute for In-Context Segmentation via Compositional Prompts and Phased Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13179-13188} }
TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qin and Morrison, Abigail and Scharr, Hanno and Krajsek, Kai}, title = {TimeBridge: Self-Supervised Video Representation Learning via Start-End Joint Embedding and In-Between Frame Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39647-39658} }
Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mosco_2026_CVPR, author = {Mosco, Simone and Fusaro, Daniel and Pretto, Alberto}, title = {Learning to Identify Out-of-Distribution Objects for 3D LiDAR Anomaly Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17204-17214} }
Object-Generalized Re-Identification: A Step Towards Universal Instance Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shuoyi and Wu, Yurui and Ye, Mang}, title = {Object-Generalized Re-Identification: A Step Towards Universal Instance Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18481-18491} }
Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Lyu, Pengyuan and Zhang, Chengquan and Shen, Huawen and Wu, Liang and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {Towards Real-World Document Parsing via Realistic Scene Synthesis and Document-Aware Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23709-23719} }
Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoji and Gu, Xin and Li, Jiawen and Ma, Chixiang and Bai, Sule and Zhang, Chubin and Zhang, Bowen and Zhou, Zhichao and He, Dongliang and Tang, Yansong}, title = {Thinking With Videos: Multimodal Tool-Augmented Reinforcement Learning for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32903-32914} }
High-Fidelity Mobile Avatars with Pruned Local Blendshapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Youyi and Wang, He and Shao, Tianjia and Zhou, Kun}, title = {High-Fidelity Mobile Avatars with Pruned Local Blendshapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32345-32356} }
Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Wang and Zeng, Yuhui and Luo, Yongdong and Xie, Tianyu and Lin, Luojun and Ji, Jiayi and Zhang, Yan and Zheng, Xiawu}, title = {Wavelet-based Frame Selection by Detecting Semantic Boundary for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24052-24061} }
Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhao and Wei, Ping and Ren, Ziyang and Li, Huan and Yin, Xiangru}, title = {Bridging Pixels and Words: Mask-Aware Local Semantic Fusion for Multimodal Media Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26561-26571} }
Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Qiya and Xie, Yiqiang and Sun, Yuan and Dian, Renwei and Kang, Xudong}, title = {Robust Remote Sensing Image-Text Retrieval with Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9732-9741} }
CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Xinhai and Xu, Shaoyuan and Biyani, Manan and Li, Moyan and Liu, Jia and Hollon, Todd C and Wang, Bryan}, title = {CodeV: Code with Images for Faithful Visual Reasoning via Tool-Aware Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21500-21510} }
Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Boyuan and Jiang, Richard}, title = {Neural Differentiation in Deep Networks: A Theoretical Framework for Expressivity and Representational Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41944-41953} }
STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zongzhao and Ma, Zongyang and Li, Mingze and Li, Songyou and Rong, Yu and Xu, Tingyang and Zhang, Ziqi and Zhao, Deli and Huang, Wenbing}, title = {STAR-R1: Multi-View Spatial TrAnsformation Reasoning by Reinforcing Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12041-12051} }
Mario: Multimodal Graph Reasoning with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yuanfu and Li, Kang and Guo, Pengkang and Liu, Jiajin and Tan, Qiaoyu}, title = {Mario: Multimodal Graph Reasoning with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19219-19228} }
ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion-
[pdf]
[bibtex]@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Souymodip and Singh, Ankur and Singh, Amit Vikram and Batra, Vineet and Phogat, Ankit}, title = {ShapeAR: Generating Editable Shape Layers via Autoregressive Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40664-40673} }
MRI Contrast Enhancement Kinetics World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Jindi and He, Yuting and Xia, Cong and Ge, Rongjun and Li, Shuo}, title = {MRI Contrast Enhancement Kinetics World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1288-1299} }
L3DR: 3D-aware LiDAR Diffusion and Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {L3DR: 3D-aware LiDAR Diffusion and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17153-17163} }
Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions-
[pdf]
[supp]
[bibtex]@InProceedings{Mei_2026_CVPR, author = {Mei, Shibin and Wang, Hang and Ni, Bingbing}, title = {Semantic Derivative Flow: Graph-Guided Diffusion for Controllable Instance Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14822-14831} }
Towards Stable Federated Continual Test-Time Adaptation in Wild World-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Liwen and Dong, Xingbo and Liao, Iman Yi and Jin, Zhe}, title = {Towards Stable Federated Continual Test-Time Adaptation in Wild World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29451-29461} }
Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Fengyang and Hu, Peng and Xu, Lei and Guo, XingE and Qin, Guanyi and Shen, Yuqi and Fang, Chengyu and Zhang, Rihan and He, Chunming and Farsiu, Sina}, title = {Beyond Ground-Truth: Leveraging Image Quality Priors for Real-World Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29897-29908} }
MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingjun and Yuan, Yujian and Wu, Changjie and Chang, Xinyuan and Cai, Xin and Zeng, Shuang and Shi, Linzhe and Wang, Sijin and Zhang, Hang and Xu, Mu}, title = {MindDriver: Introducing Progressive Multimodal Reasoning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17831-17841} }
Hybrid Token Compression for Vision-Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Guo, Xiaoyang and Cai, Kaitong and Lv, Qinhan and Fan, Yijia and Chai, Wenhao and Wang, Jian and Wang, Keze}, title = {Hybrid Token Compression for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31889-31899} }
Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Luo, Tingjin}, title = {Trust-calibrated Collaborative Learning for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40866-40875} }
STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Han, Tao and Zhang, Jie and Guo, Song and Bai, Lei}, title = {STCast: Adaptive Boundary Alignment for Global and Regional Weather Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20586-20596} }
Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Wang, Yunhong and Wang, Lu and Chen, Jiaxin}, title = {Parameter-Efficient Adaptation for MLLMs via Implicit Modality Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37745-37755} }
Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yurong and Zhang, Zicheng and Han, Congying and Guo, Tiande and Qiu, Xinmin}, title = {Resolving Endpoint Underfitting in Diffusion Bridges via Noise Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27388-27397} }
Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weize and Du, Yunhao and Yin, Qixiang and Zhao, Zhicheng and Su, Fei}, title = {Rethinking Two-Stage Referring-by-Tracking in Referring Multi-Object Tracking: Make it Strong Again}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42549-42559} }
FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, YuAn and Li, Xiaofan and Huang, Chi and Zhang, Wenhao and Li, Hao and Wang, Bosheng and Sun, Xun and Wang, Jun}, title = {FaithFusion: Harmonizing Reconstruction and Generation via Pixel-wise Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1198-1209} }
Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges & Faces Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Dacheng and Wang, Chenyu and Xu, Jingwei and Chu, Tianzhe and Zhao, Zibo and Liu, Wen and Ding, Wenrui and Ma, Yi and Gao, Shenghua}, title = {Pointer-CAD: Unifying B-Rep and Command Sequences via Pointer-based Edges \& Faces Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17377-17387} }
VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Anh Thuan and Kosecka, Jana}, title = {VarSplat: Uncertainty-aware 3D Gaussian Splatting for Robust RGB-D SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26072-26082} }
SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jungho and Oh, Jiyong and Yu, Seunghoon and Shin, Hongjae and Kwak, Donghyuk and Choi, Jun Won}, title = {SafeDrive: Fine-Grained Safety Reasoning for End-to-End Driving in a Sparse World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24854-24864} }
StreamDiT: Real-Time Streaming Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kodaira_2026_CVPR, author = {Kodaira, Akio and Hou, Tingbo and Hou, Ji and Georgopoulos, Markos and Juefei-Xu, Felix and Tomizuka, Masayoshi and Zhao, Yue}, title = {StreamDiT: Real-Time Streaming Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29200-29210} }
IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingan and Li, Wensheng and Gao, Chengying}, title = {IR-HGP: Physically-Aware Gaussian Inverse Rendering for High-Illumination Scenes via Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1210-1220} }
GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lin and Jia, Caiyan and Yu, Guanyi and Song, Ziying and Li, Junqiao and Jia, Feiyang and Wu, Peiliang and Hao, Xiaoshuai and Luo, Yadan}, title = {GuideFlow: Constraint-Guided Flow Matching for Planning in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3719-3728} }
CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Wenyuan and Wu, Yutan and He, Xuming}, title = {CryoKRAQEN: Kernel-Regularized Annealing for Quantized Embedding Networks in Cryo-EM Heterogeneous Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28298-28307} }
mVLM: A Vision Language Model for mNPUs-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zijie and Fan, Guiyun and Yang, Zhaoxing and Ding, Rong and Jin, Haiming}, title = {mVLM: A Vision Language Model for mNPUs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18892-18902} }
The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Weijia and Chen, Hao and Yang, Zhenheng and Shou, Mike Zheng}, title = {The Image as Its Own Reward: Reinforcement Learning with Adversarial Reward for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5999-6009} }
Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sa and Zhang, Wanqian and Wang, Lin and Chen, Xiaohua and Cui, Chenxu and Zhang, Jinchao and Li, Bo}, title = {Decompose and Transfer: CoT-Prompting Enhanced Alignment for Open-Vocabulary Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20334-20344} }
Event Stream Filtering via Probability Flux Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jinze and Zhai, Wei and Cao, Yang and Li, Bin and Zha, Zheng-Jun}, title = {Event Stream Filtering via Probability Flux Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8023-8032} }
Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ci and Ding, Zhaojun and Yang, Chence and Liu, Jun and Zhai, Xiaoming and Huang, Shaoyi and Li, Beiwen and Ma, Xiaolong and Lu, Jin and Yuan, Geng}, title = {Roots Beneath the Cut: Uncovering the Risk of Concept Revival in Pruning-Based Unlearning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35872-35881} }
Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ruidong and Bai, Yancheng and Zhang, Xuanpu and Zeng, Jianhao and Wang, Lanjun and Song, Dan and Sun, Lei and Chu, Xiangxiang and Liu, Anan}, title = {Layer-wise Instance Binding for Regional and Occlusion Control in Text-to-Image Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11493-11503} }
Efficient Unrolled Networks for Large-Scale 3D Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vo_2026_CVPR, author = {Vo, Romain and Tachella, Juli\'an}, title = {Efficient Unrolled Networks for Large-Scale 3D Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36343-36353} }
SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zarei_2026_CVPR, author = {Zarei, Arman and Basu, Samyadeep and Pournemat, Mobina and Nag, Sayan and Rossi, Ryan A. and Feizi, Soheil}, title = {SliderEdit: Continuous Image Editing with Fine-Grained Instruction Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14430-14439} }
Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Ke and Zhang, Jiangning and Yi, Ran and Gong, Jingyu and Wang, Yabiao and Wang, Yating and Tan, Xin and Wang, Chengjie and Ma, Lizhuang}, title = {Open the Motion Door: Atomic Motion Decomposition and Recomposition for Open-Vocabulary Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9330-9341} }
Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Xuemei and Du, Jiawei and Wei, Hui and Chen, Jun and Zhou, Joey Tianyi and Wang, Zheng}, title = {Reinforcement-Guided Synthetic Data Generation for Privacy-Sensitive Identity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20034-20044} }
Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Yihao and He, Xianglong and Pan, Chuanyu and Chen, Yiwen and Wu, Jiaqi and Li, Yangguang and Ouyang, Wanli and Hu, Yuanming and Yang, Guang and Yap, ChoonHwai}, title = {Faithful Contouring: Near-Lossless 3D Voxel Representation Free from Iso-surface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14408-14418} }
Image Generation from Contextually-Contradictory Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Patashnik, Or and Dahary, Omer and Mokady, Ron and Cohen-Or, Daniel}, title = {Image Generation from Contextually-Contradictory Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14885-14894} }
See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuoshuo and Zhang, Yizhen and Fu, Jingjing and Song, Lei and Bian, Jiang and Yang, Yujiu and Wang, Rui}, title = {See Less, See Right: Bi-directional Perceptual Shaping For Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33499-33509} }
Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zhen and Liu, Jian and Lei, Biwen and Xu, Jing and Weng, Haohan and Zhu, Yiling and Chen, Zhuo and Fan, Junfeng and Ma, Yunkai and Du, Dazhao and Guo, Song and Jing, Fengshui and Guo, Chunchao}, title = {Mesh-Pro: Asynchronous Advantage-guided Ranking Preference Optimization for Artist-style Quadrilateral Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34248-34258} }
Causality in Video Diffusers is Separable from Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xingjian and He, Guande and Li, Zhengqi and Shechtman, Eli and Huang, Xun and Wu, Zongze}, title = {Causality in Video Diffusers is Separable from Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43373-43384} }
X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhong, Zehao and Zhou, YongSong and Li, Yudong and Wu, Ende and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {X-PCR: A Benchmark for Cross-modality Progressive Clinical Reasoning in Ophthalmic Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33110-33120} }
An Empirical Study on How Video-LLMs Answer Video Questions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Ma, Ziyu and Duan, Zicheng and He, Haoyu and Chen, Feng and Liu, Akide and Zhuang, Bohan and Cai, Jianfei and Rezatofighi, Hamid}, title = {An Empirical Study on How Video-LLMs Answer Video Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18587-18597} }
Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ting-Hsuan and Chen, Ying-Huan and Tu, Tao and Lee, Jie-Ying and Wu, Cho-Ying and Lin, Fangzhou and Zhang, Hengyuan and Paz, David and Huang, Xinyu and Guo, Yuliang and Liu, Yu-Lun and Wang, Yue and Ren, Liu}, title = {Pantheon360: Taming Digital Twin Generation via 3D-Aware 360deg Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11138-11149} }
Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yunkai and Zhang, Yudong and Zhang, Kunquan and Zhang, Jinxiao and Chen, Xinying and Fu, Haohuan and Dong, Runmin}, title = {Task-Oriented Data Synthesis and Control-Rectify Sampling for Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42147-42157} }
Robust Spiking Neural Networks by Temporal Mutual Information-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Mengting and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Robust Spiking Neural Networks by Temporal Mutual Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20711-20720} }
Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuelin and Cheng, Sijie and Li, Chen and Li, Zongzhao and Huang, Yuxin and Liu, Yang and Huang, Wenbing}, title = {Recurrent Reasoning with Vision-Language Models for Estimating Long-Horizon Embodied Task Progress}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41150-41159} }
SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Mengmeng and Jiang, Dengyang and Li, Liuzhuozheng and Lin, Yucheng and Shen, Guojiang and Kong, Xiangjie and Liu, Yong and Dai, Guang and Wang, Jingdong}, title = {SRA 2: Variational Autoencoder Self-Representation Alignment for Efficient Diffusion Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32978-32987} }
InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Zihao and Rui, Shaohao and Tang, Zhenyu and Wang, Guotai and Wang, Xiaosong}, title = {InvCoSS: Inversion-driven Continual Self-supervised Learning in Medical Multi-modal Image Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42626-42636} }
Correspondence-Attention Alignment for Multi-View Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Minkyung and Choi, Jinhyeok and Park, Jiho and Jeon, Seonghu and Jang, Jinhyuk and Seo, Junyoung and Kwak, Minseop and Kim, Jin-Hwa and Kim, Seungryong}, title = {Correspondence-Attention Alignment for Multi-View Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2316-2326} }
GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Liyuan and Narayana, Manjunath and Stary, Michal and Hutchcroft, Will and Wetzstein, Gordon and Armeni, Iro}, title = {GaussFusion: Improving 3D Reconstruction in the Wild with A Geometry-Informed Video Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15432-15442} }
CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Maliha_2026_CVPR, author = {Maliha, Maisha and Hougen, Dean F.}, title = {CIGMA: Causal Information-Gain Mechanistic Attribution of Attention Heads in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9891-9900} }
ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boeder_2026_CVPR, author = {Boeder, Simon and Gigengack, Fabian and Roesler, Simon and Caesar, Holger and Risse, Benjamin}, title = {ShelfOcc: Native 3D Supervision beyond LiDAR for Vision-Based Occupancy Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28620-28631} }
Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Shawn and Price, Brian and Fan, Yifei and Morse, Bryan}, title = {Beyond Single Images: A Comprehensive Benchmark for Album-Level Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38564-38573} }
AToken: A Unified Tokenizer for Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jiasen and Song, Liangchen and Xu, Mingze and Ahn, Byeongjoo and Wang, Yanjun and Chen, Chen and Dehghan, Afshin and Yang, Yinfei}, title = {AToken: A Unified Tokenizer for Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28701-28711} }
Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yujuan and Li, Qing and Li, Ziyu and Li, Xiuxing and Wang, Zhuo and Xu, Mengrui and Wu, Xia}, title = {Active Perceptual Inference: A Corticothalamic-Inspired Dynamic Nested Recurrent Network for Multimodal Sentiment Analysis with Incomplete Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1787-1797} }
Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Meng and Xia, Changqun and Wang, Yuze and Wang, Junyi and Duan, Wantong and Xie, Xinxiong and Qi, Yue}, title = {Urban-GS: A Unified 3D Gaussian Splatting Framework for Compact and High-Fidelity Aerial-to-Street Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33207-33216} }
Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yifan and Huang, Haofeng and Yang, Wenhan and Liu, Jiaying}, title = {Towards Generalized Representations for Low-Light Understanding: When Signal Constancy Meets Semantic Enrichment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1386-1395} }
Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huan_2026_CVPR, author = {Huan, Zihan and Pan, Xipeng and Zhang, Hualong and Feng, Siyang and Lan, Rushi and Wang, Huadeng and Lu, Haoxiang and Liu, Zhenbing}, title = {Bridging RGB and Hematoxylin Components: An Interleaved Guidance and Fusion Framework for Point Supervised Nuclei Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37518-37527} }
Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Luo, Jiabin and Yan, Zhenglin and Yu, Zhenyu and Fu, Rong and Gan, Zhongxue and Ouyang, Chun}, title = {Tri-Subspaces Disentanglement for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8791-8800} }
Event-based Visual Deformation Measurement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuliang and Zhai, Wei and Cui, Yuxin and Zhao, Tiesong and Cao, Yang and Zha, Zheng-Jun}, title = {Event-based Visual Deformation Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {903-913} }
Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Yerim and Lee, Miso and Moon, WonJun and Heo, Jae-Pil}, title = {Masking Matters: Unlocking the Spatial Reasoning Capabilities of LLMs for 3D Scene-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38668-38677} }
GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Bing and Liu, Weiyuan and Zhang, Changlong and Wang, Chenxi and Zhao, Zhibin and Zhai, Zhi}, title = {GeoDexGrasp: Geometry-aware Generation for Data-efficient and Physics-plausible Dexterous Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6729-6739} }
PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mueller_2026_CVPR, author = {Mueller, Felix B. and Meier, Jan F. and Lueddecke, Timo and Vogg, Richard and Freixanet, Roger L. and Hassler, Valentin and Bosshard, Tiffany and Karakoc, Elif and O'Hearn, William J. and Pereira, Sofia M. and Sehner, Sandro and Wierucka, Kaja and Burkart, Judith and Fichtel, Claudia and Fischer, Julia and Gail, Alexander and Hobaiter, Catherine and Ostner, Julia and Samuni, Liran and Sch\"ulke, Oliver and Shahidi, Neda and Wessling, Erin G. and Ecker, Alexander S.}, title = {PriVi: Towards a General-Purpose Video Model for Primate Behavior in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38804-38815} }
Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Deyu and Wang, Xinchao}, title = {Beyond Soft Label: Dataset Distillation via Orthogonal Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5596-5605} }
CUBic: Coordinated Unified Bimanual Perception and Control Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Ding, Pengxiang and Xu, Jingkai and Wang, Donglin and Fan, Zhaoxin}, title = {CUBic: Coordinated Unified Bimanual Perception and Control Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20856-20866} }
CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Li, Yichen and Liu, Xiwei and Yang, Haolin and Lu, Yifan and Zou, Ziyun and Li, Yulong and Li, Huifa and Chen, Dongliang and Wang, Qinglei and Liu, Weiyang and Qian, Ying and Shi, Jiangming and Razzak, Imran}, title = {CHIPS: Efficient CLIP Adaptation via Curvature-aware Hybrid Influence-based Data Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29483-29493} }
Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chi-Pin and Man, Yunze and Yu, Zhiding and Chen, Min-Hung and Kautz, Jan and Wang, Yu-Chiang Frank and Yang, Fu-En}, title = {Fast-ThinkAct: Efficient Vision-Language-Action Reasoning via Verbalizable Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5070-5081} }
Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyuan and Zou, Bochao and Liu, Qiankun and Yu, Haochen and Mei, Qi and Jiang, Jianfei and Liu, Chen and Bi, Cheng and Wang, Zhao and Zhang, Xueyang and Zhan, Yifei and Chen, Jiansheng and Ma, Huimin}, title = {Unposed-to-3D: Learning Simulation-Ready Vehicles from Real-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24843-24853} }
Geometry-Guided 3D Visual Token Pruning for Video-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Han and Huang, Zehao and Fu, Jiahui and Wang, Naiyan and Liu, Si}, title = {Geometry-Guided 3D Visual Token Pruning for Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9615-9625} }
DiP: Taming Diffusion Models in Pixel Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhennan and Zhu, Junwei and Chen, Xu and Zhang, Jiangning and Hu, Xiaobin and Zhao, Hanzhen and Wang, Chengjie and Yang, Jian and Tai, Ying}, title = {DiP: Taming Diffusion Models in Pixel Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36136-36146} }
ReasonX: MLLM-Guided Intrinsic Image Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dirik_2026_CVPR, author = {Dirik, Alara and Wang, Tuanfeng Yang and Ceylan, Duygu and Zafeiriou, Stefanos and Fr\"uhst\"uck, Anna}, title = {ReasonX: MLLM-Guided Intrinsic Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30802-30812} }
DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chaolang and Dai, Pengwen and Li, Jingyu and Yao, Siyuan and Jiang, Yuchen and Zheng, Zhuoran}, title = {DyFCLT: Dynamic Frequency-Decoupled Cross-Modal Learning Transformer for Multimodal Tiny Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11313-11323} }
ReLaX: Reasoning with Latent Exploration for Large Reasoning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shimin and Chen, Xianwei and Shen, Yufan and Ye, Ziyuan and Wu, Jibin}, title = {ReLaX: Reasoning with Latent Exploration for Large Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33761-33771} }
SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Han and Zhang, Yan and Yu, Ruiqi and Xie, Cong and Huang, Jie and Zhan, Zhenpeng}, title = {SketchAssist: A Practical Assistant for Semantic Edits and Precise Local Redrawing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16258-16267} }
Residual Primitive Fitting of 3D Shapes with SuperFrusta-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganeshan_2026_CVPR, author = {Ganeshan, Aditya and Gadelha, Matheus and Groueix, Thibault and Chen, Zhiqin and Chaudhuri, Siddhartha and Kim, Vladimir and Yifan, Wang and Ritchie, Daniel}, title = {Residual Primitive Fitting of 3D Shapes with SuperFrusta}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7404-7413} }
Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Junwei and Liang, Dong and Huang, Sheng-Jun and Zhan, Kun and Chen, Songcan}, title = {Continuous Exposure-Time Modeling for Realistic Atmospheric Turbulence Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26678-26687} }
DiffBMP: Differentiable Rendering with Bitmap Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Seongmin and Kim, Junghun James and Kim, Daehyeop and Chung, Insoo and Chun, Se Young}, title = {DiffBMP: Differentiable Rendering with Bitmap Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26741-26750} }
BrickNet: Graph-Backed Generative Brick Assembly-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kulits_2026_CVPR, author = {Kulits, Peter and Schmid, Cordelia}, title = {BrickNet: Graph-Backed Generative Brick Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39252-39261} }
Diffusion Mental Averages-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thawatdamrongkit_2026_CVPR, author = {Thawatdamrongkit, Phonphrm and Seripanitkarn, Sukit and Suwajanakorn, Supasorn}, title = {Diffusion Mental Averages}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35713-35725} }
SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhao-Min and Huang, Xinjian and Ge, Yisu and Li, Yu}, title = {SFR-Net: Steering-Fusion-Refining Network in Multi-label Zero-Shot Sewer Defect Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41636-41645} }
SAM 3D: 3Dfy Anything in Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xingyu and CHU, FU-JEN and Gleize, Pierre and Liang, Kevin J and Sax, Alexander and Tang, Hao and Wang, Weiyao and Guo, Michelle and Hardin, Thibaut and Li, Xiang and Lin, Aohan and Liu, Jia-Wei and Ma, Ziqi and Sagar, Anushka and Song, Bowen and Wang, Xiaodong and Yang, Jianing and Zhang, Bowen and Doll\'ar, Piotr and Gkioxari, Georgia and Feiszli, Matt and Malik, Jitendra}, title = {SAM 3D: 3Dfy Anything in Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7220-7232} }
RNED: Rotary Number Encoding and Decoding for Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fengbei and Kwak, Sunwoo and Nizam, Nusrat and Richter, Ilan and Beecy, Ashley and Raikhelkar, Jayant and Estrin, Deborah and Sabuncu, Mert R.}, title = {RNED: Rotary Number Encoding and Decoding for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13722-13731} }
LumiMotion: Improving Gaussian Relighting with Scene Dynamics-
[pdf]
[supp]
[bibtex]@InProceedings{Kaleta_2026_CVPR, author = {Kaleta, Joanna and W\'ojcik, Piotr and Marzol, Kacper and Trzcinski, Tomasz and Kania, Kacper and Kowalski, Marek}, title = {LumiMotion: Improving Gaussian Relighting with Scene Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37311-37321} }
Distribution-Aligned Multimodal Fusion for Robust Object Detection-
[pdf]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Xiaohui and Pu, Yanglin and Wang, Yongjun and She, Rui}, title = {Distribution-Aligned Multimodal Fusion for Robust Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25494-25503} }
CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Liu, Lin and Zhang, Xiaopeng and Xue, Wei and Luo, Wenhan and Guo, Yike and Tian, Qi}, title = {CogniEdit: Dense Gradient Flow Optimization for Fine-Grained Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1145-1154} }
BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tengfei and Fan, Yijian and Wang, Boyue and Hu, Yongli and Li, Mingjie and Li, Jinghua and Gao, Junbin and Chang, Xiaojun and Li, Zhihui and Yin, Baocai}, title = {BiOTPrompt: Bidirectional Optimal Transport Guided Prompting for Disease Evolution-aware Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13755-13765} }
D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Wu, Xiangping and Chen, Qingcai}, title = {D2Dewarp: Dual Dimensions Geometric Representation Learning Based Document Image Dewarping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34734-34744} }
SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yujie and Li, Jingwen and Ju, Sibo and Su, Yanzhou and Yao, He and Liu, Yisong and Zhu, Min and Cheng, Junlong}, title = {SegMoTE: Token-Level Mixture of Experts for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36332-36342} }
DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Runsheng and Zhang, Chengyu and Deng, Yangdong}, title = {DRiffusion: Draft-and-Refine Process Parallelizes Diffusion Models with Ease}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16551-16560} }
From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Zhuang and Tang, Ying-Peng and Meng, Lei and Chao, Guoqing and Wu, Lei and Yu, Han and Meng, Xiangxu}, title = {From Selection to Scheduling: Federated Geometry-Aware Correction Makes Exemplar Replay Work Better under Continual Dynamic Heterogeneity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17494-17504} }
STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yiwei and Wan, Hui and Luo, Xiao and Deng, Minghua}, title = {STAR: Test-Time Adaptation Can Enhance Universal Prompt Learning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31482-31492} }
Anti-Degradation Lifelong Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingfeng and Pan, Hao and Yuan, Honglin and Sun, Yuan and Zhao, Xujian and Lin, Jiaqi and Ren, Zhenwen}, title = {Anti-Degradation Lifelong Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8749-8759} }
FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Tan, Zihan and Wan, Guancheng and Huang, Wenke and Li, He and Ye, Mang}, title = {FedSDR: Federated Graph Learning with Structural Noise Detection and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3379-3389} }
SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Allen and Ying, Haiyang and Hanson, Alex and Lee, Yonghan and Goldstein, Tom and Zwicker, Matthias}, title = {SpeeDe3DGS: Speedy Deformable 3D Gaussian Splatting with Temporal Pruning and Motion Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26083-26093} }
Semantic Context Matters: Improving Conditioning for Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Dongyang and Xu, Ryan and Zeng, Jianhao and Lan, Rui and Bai, Yancheng and Sun, Lei and Chu, Xiangxiang}, title = {Semantic Context Matters: Improving Conditioning for Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30402-30413} }
SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Li and Cai, Boyu and Liu, Wei and Wang, Yan and Yuan, Chunfeng and Li, Bing and Hu, Weiming}, title = {SRA-Det: Learning Omni-Grained Open-Vocabulary Detection Beyond Category Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27611-27620} }
ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Steiner_2026_CVPR, author = {Steiner, Emily and Zheng, Jianhao and Howard-Jenkins, Henry and Xie, Chris and Armeni, Iro}, title = {ReScene4D: Temporally Consistent Semantic Instance Segmentation of Evolving Indoor 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10710-10720} }
Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Liao, Borui and Huang, Huijuan and Lu, Jinda and Li, Ouxiang and Liu, Kuien and Wang, Meng and Wang, Xiang}, title = {Thinking with Frames: Generative Video Distortion Evaluation via Frame Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4526-4536} }
LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shu_2026_CVPR, author = {Shu, Zhijian and Lin, Cheng and Xie, Tao and Yin, Wei and Li, Ben and Pu, Zhiyuan and Li, Weize and Yao, Yao and Cao, Xun and Guo, Xiaoyang and Long, Xiao-Xiao}, title = {LiteVGGT: Boosting Vanilla VGGT via Geometry-aware Cached Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36422-36432} }
Lipschitz Optimization for Formal Verification of Homographies-
[pdf]
[supp]
[bibtex]@InProceedings{Durand_2026_CVPR, author = {Durand, Jean-Guillaume and Kouvaros, Panagiotis and Gariel, Maxime and Lomuscio, Alessio}, title = {Lipschitz Optimization for Formal Verification of Homographies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13306-13315} }
Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Jun and Miao, Hongyi and Zhou, Yingjie and Zhou, Wangqiu and Zhang, Jianbo and Cao, Linhan and Zhu, Dandan and Yang, Hua and Min, Xiongkuo and Sun, Wei and Zhai, Guangtao}, title = {Adapter Shield: A Unified Framework with Built-in Authentication for Preventing Unauthorized Zero-Shot Image-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30120-30129} }
AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Omama_2026_CVPR, author = {Omama, Mohammad and Berton, Gabriele and Foxlin, Eric and Kim, Yelin}, title = {AsymLoc: Towards Asymmetric Feature Matching for Efficient Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26441-26451} }
Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Haozhen and Ji, Xiaozhong and Liu, Yuansen and Wu, Wenbin and Yan, Xiaoxiao and Liu, Jingjing and Wu, Kai and Pan, Jiazhen and Jian, Bailiang and Zhang, Jiangning and Hu, Xiaobin and Li, Hongwei Bran}, title = {Med-CMR: A Fine-Grained Benchmark Integrating Visual Evidence and Clinical Logic for Medical Complex Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41224-41234} }
PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongchen and Li, Tianyu and Yang, Jiazhi and Shang, Mingyang and Wu, Gaoqiang and Wang, Caojun and Tian, Haochen and Lin, Zengrong and Hao, Zhihui and Lang, XianPeng and Hu, Jia and Li, Hongyang}, title = {PlannerRFT: Reinforcing Diffusion Planners through Closed-Loop and Sample-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24929-24938} }
Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Kou, Ziyi and Qin, Chuan and Huang, Mia and Ristani, Ergys and Kumar, Ankit and Chen, Lele and He, Kun and Boularias, Abdeslam and Guan, Li}, title = {Glove2Hand: Synthesizing Natural Hand-Object Interaction from Multi-Modal Sensing Gloves}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1829-1840} }
ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Shuo and Ma, Nan and Li, Jiayang and Li, Xiaohui and Shao, Lihao and Zhu, Kaiwen and Zhou, Yu and Pu, Yuandong and Wu, Jiarui and Wang, Jiaquan and Qu, Bo and Wang, Wenhai and Qiao, Yu and Yao, Dajuin and Liu, Yihao}, title = {ArtiMuse: Fine-Grained Image Aesthetics Assessment with Joint Scoring and Expert-Level Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15313-15322} }
Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Linjun and Yu, Jiejia and Jin, Leyang and Wang, He and Zheng, Bowen and Yang, Xu and Jiang, Hao and Xia, Fei and Ling, Fei and Deng, Jun and Jin, Xiaogang}, title = {Unifying Precise Keyframes and Semantic Control via Multi-level Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23473-23483} }
Improving Sparse Autoencoder with Dynamic Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongsheng and Zhang, Jinsen and Su, Dawei and Huang, Hui}, title = {Improving Sparse Autoencoder with Dynamic Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41996-42006} }
SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qianxun and Song, Chenxi and Cai, Yujun and Zhang, Chi}, title = {SwitchCraft: Training-Free Multi-Event Video Generation with Attention Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29136-29145} }
DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Zhengyao and Xia, Menghan and Wang, Xintao and Wong, Kwan-Yee K.}, title = {DUO-VSR: Dual-Stream Distillation for One-Step Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16333-16344} }
HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaoyu and Liu, Yuhang and Kang, Xuanshuo and Luo, Zheng and Lou, Fangqi and Wu, Xiaohua and Xiong, Zihan}, title = {HiFICL: High-Fidelity In-Context Learning for Multimodal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3069-3078} }
Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2026_CVPR, author = {Ryu, Hyeonggon and Chung, Joon Son and Harwath, David}, title = {Hear you are: Teaching LLMs Spatial Reasoning with Vision and Spatial Sound}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38606-38615} }
BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schween_2026_CVPR, author = {Schween, Melissa and Kruse, Mathis and Rosenhahn, Bodo}, title = {BUSSARD: Normalizing Flows for Bijective Universal Scene-Specific Anomalous Relationship Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28512-28523} }
Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenbin and Zhang, Lei and Wang, Lituan and Zhang, Zhenwei and Qian, Guangwu and Wang, Yan and Huang, Wei}, title = {Energy Waveify and Redistribution for Test-Time Adaptation: A Control System Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15040-15049} }
Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Borse_2026_CVPR, author = {Borse, Shubhankar and Pham, Phuc and Farhadzadeh, Farzad and Choi, Seokeon and Nguyen, Phong and Tran, Anh and Yun, Sungrack and Hayat, Munawar and Porikli, Fatih}, title = {Ar2Can: An Architect and an Artist Leveraging a Canvas for Multi-Human Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {550-560} }
OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herzog_2026_CVPR, author = {Herzog, Henry and Bastani, Favyen and Zhang, Yawen and Tseng, Gabriel and Redmon, Joseph and Sablon, Hadrien and Park, Ryan and Morrison, Jacob and Buraczynski, Alexandra and Farley, Karen and Hansen, Josh and Howe, Andrew and Johnson, Patrick Alan and Otterlee, Mark and Schmitt, Ted and Pitelka, Hunter and Daspit, Stephen and Ratner, Rachel and Wilhelm, Christopher and Wood, Sebastian and Jacobi, Mike and Kerner, Hannah and Shelhamer, Evan and Farhadi, Ali and Krishna, Ranjay and Beukema, Patrick}, title = {OlmoEarth: Stable Latent Image Modeling for Multimodal Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34806-34817} }
InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Haoming and Xue, Qiyao and Gao, Wei}, title = {InfiniBench: Infinite Benchmarking for Visual Spatial Reasoning with Customizable Scene Complexity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21594-21605} }
DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Li and Mei, Mingyu and Wang, Ailing and Meng, Xianhui and Zhong, Yan and Song, Xinyuan and Liu, Liu and Wang, Rujing and He, Zaixing and Lu, Cewu}, title = {DICArt: Advancing Category-level Articulated Object Pose Estimation in Discrete State-Spaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4687-4697} }
Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ruichao and Gao, Wei and Zhu, Xiaobin and Ma, Jing and Lin, Hongzhan and Luo, Ziyang and Zhang, Bo-Wen and Yin, Xu-Cheng}, title = {Probabilistic Concept Graph Reasoning for Multimodal Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19107-19118} }
AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Hu, Kaizhe and Huang, Yingqian and Ju, Yuanchen and Xue, Zhengrong and Xu, Huazhe}, title = {AffordGen: Generating Diverse Demonstrations for Generalizable Object Manipulation with Affordance Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15966-15975} }
OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zhihao and Wang, Cheng and Liu, Shengyuan and Liang, Zhiying and Ye, Zanting and Ju, Min Jie and Woo, Peter YM and Yuan, Yixuan}, title = {OmniBrainBench: A Comprehensive Multimodal Benchmark for Brain Imaging Analysis Across Multi-stage Clinical Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42732-42743} }
QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Komal_2026_CVPR, author = {Komal, Komal and Gupta, Mukul and Singh, Saumya and Vipparthi, Santosh Kumar and Reddy, C.C. and Murala, Subrahmanyam}, title = {QuCNet: Quantum Deep Learning Driven Multi-Circuit Network for Remote Sensing Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20253-20262} }
Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxin and Zhou, Yinan and Chen, Yuxin and Zhang, Ziqi and Ma, Zongyang and Yuan, Chunfeng and Li, Bing and Gao, Jun and Hu, Weiming}, title = {Beyond Semantic Search: Towards Referential Anchoring in Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31155-31165} }
UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Walmer_2026_CVPR, author = {Walmer, Matthew and Suri, Saksham and Aggarwal, Anirud and Shrivastava, Abhinav}, title = {UPLiFT: Efficient Pixel-Dense Feature Upsampling with Local Attenders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41288-41298} }
Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Anqi and Ji, Xiaokang and Gao, Guangyu and Jiao, Jianbo and Liu, Chi Harold and Wei, Yunchao}, title = {Rethinking MLLM Itself as a Segmenter with a Single Segmentation Token}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19196-19207} }
Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuran and Zeng, Bohan and Tong, Chengzhuo and Liu, Wenxuan and Shi, Yang and Ma, Xiaochen and Liang, Hao and Zhang, Yuanxing and Zhang, Wentao}, title = {Scone: Bridging Composition and Distinction in Subject-Driven Image Generation via Unified Understanding-Generation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7773-7783} }
Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Jinge and Zhu, Fengqing}, title = {Temporal Imbalance of Positive and Negative Supervision in Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32299-32308} }
Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahao and Lin, Fengyan and Yang, Xuechao and Feng, Chen and Zhu, Kexin and Yang, Xu and Chen, Zhide}, title = {Nano-EmoX: Unifying Multimodal Emotional Intelligence from Perception to Empathy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22986-22997} }
Global-Aware Edge Prioritization for Pose Graph Initialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Tolias, Giorgos and Matas, Jiri and Barath, Daniel}, title = {Global-Aware Edge Prioritization for Pose Graph Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28642-28651} }
AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zichuan and Liu, Yicheng and Yang, Yang and Tao, Lvfang and Ye, Deheng}, title = {AdaptVision: Efficient Vision-Language Models via Adaptive Visual Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11923-11932} }
Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ping and Chen, Zezhou and Zhang, Xingpeng and Qian, Yanlin and Hu, Huan and Liu, Xiang and Wang, Zipeng and Wang, Xin and Liu, Zhaoxiang and Wang, Kai and Lian, Shiguo}, title = {Beyond Geometry: Artistic Disparity Synthesis for Immersive 2D-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27114-27123} }
Geometry-driven OOD Detectors Are Class-Incremental Learners-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wangwang and Gao, Zijian and Wan, Tianjiao and Cao, Yuan and Dou, Yong and Xu, Kele}, title = {Geometry-driven OOD Detectors Are Class-Incremental Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34638-34649} }
ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {ReAlign: Generalizable Image Forgery Detection via Reasoning-Aligned Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21295-21305} }
Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yingkai and Zhang, Tao and Nie, Jing and Fu, Ying}, title = {Enhancing Unregistered Hyperspectral Image Super-Resolution via Unmixing-based Abundance Fusion Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41573-41583} }
Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Siqi and Li, Xinyang and Zou, Bochao and Zhuo, Junbao and Ma, Huimin and Chen, Jiansheng}, title = {Video-Only ToM: Enhancing Theory of Mind in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19208-19218} }
Saliency-Driven Token Merging for Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Weiying and Chen, Xiaoyu and Zhang, Xin and Hao, Chenhe and Ma, Jitao and Li, Yunsong and Fang, Leyuan}, title = {Saliency-Driven Token Merging for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32184-32193} }
FastGS: Training 3D Gaussian Splatting in 100 Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Shiwei and Wen, Tianci and Fang, Yongchun and Lu, Biao}, title = {FastGS: Training 3D Gaussian Splatting in 100 Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26094-26103} }
GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Jiale and Hu, Jiarui and Yang, Zesong and Luan, Kaixuan and Bao, Hujun and Cui, Zhaopeng}, title = {GaussianZoom: Progressive Zoom-in Generative 3D Gaussian Splatting with Geometric and Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11850-11859} }
An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yi and E, Junwu and Guo, Zizhan and Ma, Yu and Wang, Hanli and Fan, Rui}, title = {An Instance-Centric Panoptic Occupancy Prediction Benchmark for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14219-14228} }
Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Learning from Itself: Mining Internal Knowledge from Vision Language Models for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10830-10839} }
Reinforcing Video Object Segmentation to Think before it Segments-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Sitong and Zhuge, Yunzhi and Zhang, Lu and Yu, Jiazuo and Zhang, Pingping and Jia, Xu and Lu, Huchuan}, title = {Reinforcing Video Object Segmentation to Think before it Segments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3835-3844} }
GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Zhuojiang and Sun, Zhenghui and Lu, Feng}, title = {GazeOnce360: Fisheye-Based 360deg Multi-Person Gaze Estimation with Global-Local Feature Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12312-12321} }
Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noh_2026_CVPR, author = {Noh, Seongrae and Seo, SeungWon and Park, Gyeong-Moon and Kang, HyeongYeop}, title = {Edit-As-Act: Goal-Regressive Planning for Open-Vocabulary 3D Indoor Scene Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19864-19873} }
Scene-Centric Unsupervised Video Panoptic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Reich_2026_CVPR, author = {Reich, Christoph and Hahn, Oliver and Araslanov, Nikita and Leal-Taix\'e, Laura and Rupprecht, Christian and Cremers, Daniel and Roth, Stefan}, title = {Scene-Centric Unsupervised Video Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10753-10765} }
A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, SuYeon and Lee, Wongyu and Cho, MyeongAh}, title = {A Semantically Disentangled Unified Model for Multi-category 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33036-33045} }
AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Shuhan and Li, Peipei and Liu, Xuannan and Zhang, Dongsen and Guo, Xinyu and Li, Zekun}, title = {AVFakeBench: A Comprehensive Audio-Video Forgery Detection Benchmark for AV-LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35416-35426} }
Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qianyu and Yu, Shujian}, title = {Continual Learning for fMRI-Based Brain Disorder Diagnosis via Functional Connectivity Matrices Generative Replay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25099-25109} }
B$^3$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kamata_2026_CVPR, author = {Kamata, Hiromichi and Munro, Samuel Arthur and Homma, Fuminori}, title = {B\${\textasciicircum}3\$-Seg: Camera-Free, Training-Free 3DGS Segmentation via Analytic EIG and Beta-Bernoulli Bayesian Updates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26867-26876} }
Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Mishra_2026_CVPR, author = {Mishra, Khushboo and Trivedi, Varun and Dutta, Tanima}, title = {Towards Uncertainty-aware Unsupervised Domain Adaptation for Videos and Time-Series with Causal Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29421-29430} }
What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, David and Raistrick, Alexander and Deng, Jia}, title = {What Makes Good Synthetic Training Data for Zero-Shot Stereo Matching?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34171-34180} }
Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seungwook and Cho, Minsu}, title = {Improving Text-to-Image Generation with Intrinsic Self-Confidence Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14832-14843} }
Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenxu and Zhang, Kai and Yang, Jian}, title = {Retrieve-to-Restore: Efficient All-in-One Image Restoration with a Retrieval-Based Degradation Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1277-1287} }
GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenya and Liu, Zhe and Lu, Yuxiang and Hou, Liping and Miao, Chenxuan and Peng, Siyi and Feng, Bailan and Bai, Xiang and Zhao, Hengshuang}, title = {GenieDrive: Towards Physics-Aware Driving World Model with 4D Occupancy Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35680-35690} }
GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peirong and Zhang, Yidan and Xu, Luxiao and Lin, Jinliang and Guo, Zonghao and Wang, Fengxiang and Yang, Xue and Wei, Kaiwen and Wang, Lei}, title = {GeoViS: Geospatially Rewarded Visual Search for Remote Sensing Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14335-14345} }
Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xinwang and Li, Xiuxing and Li, Qing and Zhuang, Ziyue and Wu, Yutong and Li, Ziyu and Wang, Zhuo and Li, Kai and Hao, Jianye and Wu, Xia}, title = {Human-like Abstract Visual Reasoning via Understanding and Solving Reasoning Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41235-41244} }
OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gross_2026_CVPR, author = {Gross, Markus and Matha, Sai B. and Fahmy, Aya and Song, Rui and Cremers, Daniel and Mee{\ss}, Henri}, title = {OccuFly: A 3D Vision Benchmark for Semantic Scene Completion from the Aerial Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21474-21485} }
MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Heng and Lin, Xiaotong and Zeng, Ling-An and Kang, Yulei and Li, Shuai and Hu, Jian-Fang}, title = {MotionHiFlow: Text-to-Motion via Hierarchical Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9352-9363} }
Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Shaochen and He, Yuting and Wang, Weiming and Chen, Hao}, title = {Dynamic Stream Network for Combinatorial Explosion Problem in Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15649-15658} }
What Are You Doing? A Closer Look at Controllable Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bugliarello_2026_CVPR, author = {Bugliarello, Emanuele and Arnab, Anurag and Paiss, Roni and Koh, Christy and Kindermans, Pieter-Jan and Schmid, Cordelia}, title = {What Are You Doing? A Closer Look at Controllable Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11414-11425} }
MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuze and Wang, Haozhao and Huang, Zhenyu and Wang, Zhongxu and Zhang, Jinghua and Li, Ruixuan}, title = {MR-RAG: Multimodal Relevance-Aware Retrieval-Augmented Generation for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15010-15019} }
Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jinsong and Qu, Ying and Liao, Yuan and Qi, Hairong and Shao, Zhenzhou}, title = {Semantic-Adaptive Diffusion for Dynamic Spatiotemporal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12344-12353} }
OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ossowski_2026_CVPR, author = {Ossowski, Timothy and Zhang, Sheng and Liu, Qianchu and Qin, Guanghui and Tan, Reuben and Naumann, Tristan and Hu, Junjie and Poon, Hoifung}, title = {OctoMed: Data Recipes for State-of-the-Art Multimodal Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26251-26261} }
Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Fiebelman_2026_CVPR, author = {Fiebelman, Gal and Averbuch-Elor, Hadar and Benaim, Sagie}, title = {Let it Snow! Animating 3D Gaussian Scenes with Dynamic Weather Effects via Physics-Guided Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37322-37331} }
LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Shuaibang and Zhu, Juelin and Li, Xia and Yang, Kun and Liu, Yu and Zhang, Maojun and Yan, Shen}, title = {LoD-Loc v3: Generalized Aerial Localization in Dense Cities using Instance Silhouette Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12193-12205} }
Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jaejin and Jeong, Minjae and Park, Joonhyuk and Hwang, Yechan and Baek, Seunghun and Kim, Won Hwa}, title = {Mark4D: Temporally-Consistent Watermarking for 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39393-39402} }
Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Zhongxiao and Zhao, Qitao and Jeon, Minsik and Tulsiani, Shubham}, title = {Flow3r: Factored Flow Prediction for Scalable Visual Geometry Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {438-447} }
Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yikai and Han, Renmin and Wang, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {Spatial-SAM: Spatially Consistent 3D Electron Microscopy Segmentation with SDF Memory and Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22763-22772} }
Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Kazerouni_2026_CVPR, author = {Kazerouni, Amirhossein and Suin, Maitreya and Aumentado-Armstrong, Tristan and Honari, Sina and Walia, Amanpreet and Mohomed, Iqbal and Derpanis, Konstantinos G. and Taati, Babak and Levinshtein, Alex}, title = {Face2Scene: Using Facial Degradation as an Oracle for Diffusion-Based Scene Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8428-8438} }
3D Gaussian Splatting from Unposed Spike Stream-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yijia and Hu, Tong and Hu, Liwen and Ma, Lei and Huang, Tiejun}, title = {3D Gaussian Splatting from Unposed Spike Stream}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41002-41011} }
Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jianhua and Tian, Meng and Zhu, Jiangtong and He, Fan and Zhang, Huixin and Guo, Sitong and Zhu, Dechang and Tang, Hao and Xu, Pei and Guo, Yuze and Niu, Minzhe and Zhu, Haojie and Dong, Qichao and Yan, Xuechao and Dong, Siyuan and Hou, Lu and Huang, Qingqiu and Jia, Xiaosong and Xu, Hang}, title = {Percept-WAM: Perception-Enhanced World-Awareness-Action Model for Robust End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10642-10655} }
WaTeRFlow: Watermark Temporal Robustness via Flow Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Utae and In, Sumin and Ryu, Hyunju and Choi, Jaewan and Yang, Feng and Jeong, Jongheon and Kim, Seungryong and Kim, Sangpil}, title = {WaTeRFlow: Watermark Temporal Robustness via Flow Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31703-31713} }
Post-training Feature Pruning for Fundus Images Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Van-Nguyen and Le, Duc-Tai and Bum, Junghyun and Choo, Hyunseung}, title = {Post-training Feature Pruning for Fundus Images Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37590-37599} }
LoL: Longer than Longer, Scaling Video Generation to Hour-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Justin and Wu, Jie and Li, Ming and Yang, Tao and Li, Xiaojie and Wang, Rui and Bai, Andrew and Ban, Yuanhao and Hsieh, Cho-Jui}, title = {LoL: Longer than Longer, Scaling Video Generation to Hour}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38132-38142} }
Bias at the End of the Score-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magid_2026_CVPR, author = {Magid, Salma Abdel and Guo, Grace and Tureci, Esin and Dharmasiri, Amaya and Ramaswamy, Vikram V. and Pfister, Hanspeter and Russakovsky, Olga}, title = {Bias at the End of the Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24460-24470} }
Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Zhuoli and Chang, Yu-Cheng and Wang, Yu-Kai and Do, Thomas and Lin, Chin-Teng}, title = {Neuro-Cognitive Reward Modeling for Human-Centered Autonomous Vehicle Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10599-10609} }
Markovian Scale Prediction: A New Era of Visual Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yu and Liu, Jingyi and Shi, Yiwei and Zhang, Qi and Miao, Duoqian and Wang, Changwei and Cao, Longbing}, title = {Markovian Scale Prediction: A New Era of Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41266-41277} }
Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Zhuofan and Lin, Zishan and Lin, Jinliang and Qi, Jie and Hong, Shaohua and Li, Shuo}, title = {Similarity-as-Evidence: Calibrating Overconfident VLMs for Interpretable and Label-Efficient Medical Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20973-20984} }
Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hori_2026_CVPR, author = {Hori, Ryosuke and Song, Jyun-Ting and Luo, Zhengyi and Cao, Jinkun and Shin, Soyong and Saito, Hideo and Kitani, Kris}, title = {Ground Reaction Inertial Poser: Physics-based Human Motion Capture from Sparse IMUs and Insole Pressure Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28435-28445} }
See Through the Noise: Improving Domain Generalization in Gaze Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yanming and Wang, Shijing and Huang, Yaping and Tian, Yi}, title = {See Through the Noise: Improving Domain Generalization in Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31346-31355} }
Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Zhang, Han and Jiang, Zeyu and Chen, Changhao}, title = {Monocular Open Vocabulary Occupancy Prediction for Indoor Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21627-21637} }
Kaleidoscopic Scintillation Event Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bocchieri_2026_CVPR, author = {Bocchieri, Alex and Mamish, John and Appleyard, David and Velten, Andreas}, title = {Kaleidoscopic Scintillation Event Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19749-19758} }
EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiashi and Jiang, Changhong and Lin, Xiangru and Zhang, Ruifei and Zhu, Xinyi and Liu, Jiyao and Tang, Cheng and Du, Ye and Gao, Shujian and Ning, Junzhi and Liu, Lihao and Huang, Ziyan and Li, Tianbin and Ye, Jin and He, Junjun}, title = {EvoGraph-R1: Self-Evolving Multimodal Knowledge Hypergraphs for Agentic Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {756-765} }
FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yujie and Cai, Zhuoqiang and Niu, Chaoyue and Chen, Jianchuan and Chen, Zhiwen and Lv, Chengfei and Wu, Fan}, title = {FHAvatar: Fast and High-Fidelity Reconstruction of Face-and-Hair Composable 3D Head Avatar from Few Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4132-4144} }
Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, YiZhou and Xu, Jinyi and Yin, Mingyu and Zhao, Xianyi}, title = {Edge-RecViT: Efficient Vision Transformer via Semantic-Refined Dynamic Recursion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12987-12996} }
TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Qi and Wu, Xuan and Peng, Jiawei and Miao, Yuan and Yang, Xu and Du, Yanlong}, title = {TVHighlights: LLM-Guided Human-Free Collaborative Training for Video Highlight Detection in Movies and TV Dramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9773-9783} }
RAAS: LLM Agentic System Architecture Search with GRPO-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiayi and Wan, Guancheng and Zhang, Man and Ye, Mang}, title = {RAAS: LLM Agentic System Architecture Search with GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34470-34479} }
VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Longteng and Zheng, DanDan and Qiao, Qianqian and Huang, Heng and Wang, Huaye and Bo, Yihang and Peng, Bao and Chen, Jingdong and Zhou, Jun and Jin, Xin}, title = {VGA-Bench: A Unified Benchmark and Multi-Model Framework for Video Aesthetics and Generation Quality Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30457-30466} }
BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Zheng, Haoze and Mao, Yuhang and Liu, Jinyuan and Xu, Chengpei and Xue, Xinwei and Wang, Yi and He, Xiangjian and Wang, Weimin}, title = {BiPA: Bilevel Prompt Adaptation for Underwater Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10731-10740} }
Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Yanbo and Fu, Jianlong and Zhang, Ruoxuan and Xie, Hongxia and Yao, Meibao}, title = {Beyond Success: Refining Elegant Robot Manipulation from Mixed-Quality Data via Just-in-Time Intervention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13508-13518} }
GROW: Watermark Generation with Progressive Guidance for Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Pengcheng and Jia, Zexi and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {GROW: Watermark Generation with Progressive Guidance for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35978-35987} }
Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuchen and Li, Xuzhao and Hu, Shiyu and Huang, Kaiqi}, title = {Select Less, Reason More: Prioritizing Evidence Purity for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25621-25632} }
MultiShotMaster: A Controllable Multi-Shot Video Generation Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qinghe and Shi, Xiaoyu and Li, Baolu and Bian, Weikang and Liu, Quande and Lu, Huchuan and Wang, Xintao and Wan, Pengfei and Gai, Kun and Jia, Xu}, title = {MultiShotMaster: A Controllable Multi-Shot Video Generation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16268-16278} }
Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuqing and Ma, Chuofan and Lin, Zhijie and Teng, Yao and Yu, Lijun and Wang, Shuai and Han, Jiaming and Feng, Jiashi and Jiang, Yi and Liu, Xihui}, title = {Cubic Discrete Diffusion: Discrete Visual Generation on High-Dimensional Representation Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36072-36081} }
NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Seemandhar and Gupta, Keshav and Gupta, Kunal and Chandraker, Manmohan}, title = {NERFIFY: A Multi-Agent Framework for Turning NeRF Papers into Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24384-24394} }
Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhe and Tan, Zhenxiong and Hu, Yujia and Liu, Songhua and Wang, Xinchao}, title = {Gated Condition Injection without Multimodal Attention: Towards Controllable Linear-Attention Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23388-23397} }
Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Mengmeng and Isobe, Takashi and Jia, Xu and Sun, Yanan and Yang, Zetong and Wang, Weinong and Zhou, Dong and Li, Dong and Lu, Huchuan and Barsoum, Emad}, title = {Ego-InBetween: Generating Object State Transitions in Ego-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43145-43154} }
Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Ganesan, Girish Chandar and Liu, Xiaoming}, title = {Unleashing the Power of Chain-of-Prediction for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18755-18765} }
RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahmadi_2026_CVPR, author = {Ahmadi, Ehsan and Schofield, Hunter and Khamidehi, Behzad and Arasteh, Fazel and Shan, Jinjun and Mou, Lili and Bai, Dongfeng and Rezaee, Kasra}, title = {RLFTSim: Realistic and Controllable Multi-Agent Traffic Simulation via Reinforcement Learning Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39734-39743} }
Any4D: Unified Feed-Forward Metric 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karhade_2026_CVPR, author = {Karhade, Jay and Keetha, Nikhil and Zhang, Yuchen and Gupta, Tanisha and Sharma, Akash and Scherer, Sebastian and Ramanan, Deva}, title = {Any4D: Unified Feed-Forward Metric 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14578-14589} }
It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Lishen and Zhou, Shihao and Liang, Jie and Zeng, Hui and Zhang, Lei and Yang, Jufeng}, title = {It Takes Two: A Duet of Periodicity and Directionality for Burst Flicker Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15516-15527} }
E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Meiqi and Li, Mingyu and Zhu, Junxiong}, title = {E-comIQ-ZH: A Human-Aligned Dataset and Benchmark for Fine-Grained Evaluation of E-commerce Posters with Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30941-30951} }
Progressive Mask Distillation for Self-supervised Video Representation-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Kewei and Liang, Chong and Xie, Zhao and Guo, Dan}, title = {Progressive Mask Distillation for Self-supervised Video Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41677-41687} }
GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Wu, Ziheng and Zhu, Yousong and Xue, Rongkun and Zhou, Guanghao and Luo, Ruipu and Chen, Zhenghao and Zhang, Can and Li, Yifan and He, Zhentao and Yang, Zheming and Tang, Ming and Qiu, Minghui and Wang, Jinqiao}, title = {GThinker: Towards General Multimodal Reasoning via Cue-Guided Rethinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11954-11965} }
Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jiude and Li, Yuxuan and Lu, Cewu and Sun, Jianhua}, title = {Physically Ground Commonsense Knowledge for Articulated Object Manipulation with Analytic Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13519-13528} }
Computational Speckle Pattern Interferometry-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shengxi and Yang, Sophia and Chan, Dorian and O'Toole, Matthew}, title = {Computational Speckle Pattern Interferometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41710-41719} }
CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pyatov_2026_CVPR, author = {Pyatov, Vladislav and Bobrovskikh, Gleb and Galochkin, Saveliy and Boldyrev, Nikita and Voynov, Oleg and Filippov, Alexander and Ferrer, Gonzalo and Wonka, Peter and Burnaev, Evgeny}, title = {CADFS: A Big CAD Program Dataset and Framework for Computer-Aided Design with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10176-10186} }
Perceptual Neural Video Compression with Color Separation and Rank Chain-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xiongzhuang and Tang, Chuanbo and Li, Zhuoyuan and Li, Li and Liu, Dong}, title = {Perceptual Neural Video Compression with Color Separation and Rank Chain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5348-5358} }
OS-Fed: One Snapshot Is All You Need-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Xuwei and Zhang, Jinghui and Tan, Yuchuan and Huang, Wenbo and Wu, Zhen and Zhou, Shen and Gao, LiSha and Ding, Ding and Dong, Fang}, title = {OS-Fed: One Snapshot Is All You Need}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31758-31768} }
BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saluja_2026_CVPR, author = {Saluja, Rachit and Cihangir, Asli and Deng, Ruining and Paetzold, Johannes C. and Liu, Fengbei and Sabuncu, Mert R.}, title = {BackSplit: The Importance of Sub-dividing the Background in Biomedical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8492-8502} }
Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Kaichao and Wang, He and Hao, Xiaoshuai and Yang, Xiulong and Liu, Ajian and Chu, Qi and Diao, Yunfeng and Hong, Richang}, title = {Your Classifier Can Do More: Towards Balancing the Gaps in Classification, Robustness, and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42310-42320} }
FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Minguk and Kwak, Suha}, title = {FlashDecoder: Real-Time Latent-to-Pixel Streaming Decoder with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5294-5305} }
DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaizhao and Niu, Tian and Liu, Tianyu and Guo, Chenen and Xu, Zijun and Hu, Qingda and Ding, Wenchao}, title = {DiffuView: Multi-View Diffusion Pretraining for 3D Aware Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23601-23611} }
FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Lei and Yin, Hubery and Liu, Gongye and Lv, Zhengyao and Guo, Jingcai and Li, Chen and Luo, Wenhan and Yang, Yujiu and Lyu, Jing}, title = {FlowSteer: Guiding Few-Step Image Synthesis with Authentic Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30381-30390} }
SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lingxiao and Kim, Dongwon and Ruan, Lingyan and Chen, Bin and Kwon, Taesoo and Rhee, Taehyun}, title = {SyncMos: Scalable Motion Synchronisation for Multi-Agent Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8174-8182} }
Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Chu, Xianhang and Li, Jiahao and Yang, Xu and Deng, Cheng}, title = {Cross-Architecture Adaptation: Cloud-Edge Continual Test-Time Adaptation with Dynamic Sampling and Heterogeneous Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39901-39910} }
UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Wenbin and Lin, Jiawen and Xie, Yuan and Zhang, Yachao and Qu, Yanyun}, title = {UZ3DVG: Unaided Zero-Shot 3D Visual Grounding with Generated Language Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9547-9557} }
Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaxin and Wang, Yang and Guo, Wenya and Xu, Sihan and Cai, Xiangrui and Lin, Xi and Zhang, Ying and Yuan, Xiaojie}, title = {Learning from Noisy Supervision: A Denoising-Debiasing Framework for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21326-21335} }
CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Xindong and Li, Hang and Wu, Yuchen and Li, Jiahe and Bai, Xiao and Zheng, Jin}, title = {CoLoR: The Devil is in Scene Coordinate Regression for Large-Scale Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12206-12216} }
Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Mincheol and Lee, Minseung and Choi, Seonga and Choi, Miso and Oh, Kyeongjin and Lee, Hyunyoung and Park, Cheonyoung and Song, Yongho and Park, Seunghyun and Kim, Jinkyu}, title = {Focus, Don't Prune: Identifying Instruction-Relevant Regions for Information-Rich Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31900-31909} }
Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Hyeongjun and Kim, Inchul and Kim, Min H.}, title = {Revisiting Pose Sensitivity in Splat-based Computed Tomography under Sparse-view Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25969-25978} }
Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujie and Fan, Hongwei and Chen, Di and Chen, Shengcong and Chen, Liliang and Li, Xiaoqi and Ren, Guanghui and Dong, Hao}, title = {Real2Edit2Real: Generating Robotic Demonstrations via a 3D Control Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23106-23116} }
Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Hongkun and Wu, Yuwei and Hong, Wanyi and Hu, Shenghui and Yan, Qitong and Yang, Yi and Han, Rufei and Zhou, Changju and Zhu, Minfeng and Han, Dongming and Chen, Wei}, title = {Chart-FR1: Visual Focus-Driven Fine-Grained Reasoning on Dense Charts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26285-26294} }
$\alpha$Matte4K & $\mu$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xinyi and Dong, Hang and Jiang, Baowei and Xu, Shenkun and Guan, Youqi and Shi, Kanle and Gai, Kun and Song, Haichuan}, title = {\${\textbackslash}alpha\$Matte4K \& \${\textbackslash}mu\$Matting: Dataset and Model for Ultra-Micro Precision Alpha Video Matting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12491-12500} }
Your One-Stop Solution for AI-Generated Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Long and Xue, Zihao and Wang, Yan and Yan, Zhiyuan and Xu, Jin and Jiang, Xiaorui and Yu, Haiyang and Liao, Yong and Bi, Zhen}, title = {Your One-Stop Solution for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4458-4470} }
Breaking Multimodal LLM Safety via Video-Driven Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dong and He, Xiangyu and Lyu, Xinqi and Xiao, Bin}, title = {Breaking Multimodal LLM Safety via Video-Driven Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8566-8576} }
Common Inpainted Objects In-N-Out of Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tianze and Jordan, Tyson and Sun, Ruitong and Liu, Ninghao and Sun, Jin}, title = {Common Inpainted Objects In-N-Out of Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13069-13079} }
U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Xiang and Liang, Alan and Liu, Youquan and Li, Linfeng and Kong, Lingdong and Liu, Ziwei and Liu, Qingshan}, title = {U4D: Uncertainty-Aware 4D World Modeling from LiDAR Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10027-10039} }
Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chunlei and Xia, Jiahao and Xiao, Yun and Jiang, Bo and Zhang, Jian}, title = {Disentangle-then-Align: Non-Iterative Hybrid Multimodal Image Registration via Cross-Scale Feature Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15912-15921} }
UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zihan and Zhou, Liangtai and Chen, Dian and Tang, Ni and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {UniLDiff: Unlocking the Power of Diffusion Priors for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37465-37475} }
Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuzeng and Zhang, Tao and Tang, Xiangyun and Wang, Jiacheng and Wang, Jian and Kang, Jiawen and Liu, Jiqiang and Han, Zhen and Niyato, Dusit and Kim, Dong In}, title = {Eliminate Distance Differences Induced by Backdoor Attacks: Layer-Selective Training and Clipping to Mask Backdoor Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13336-13345} }
IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Feiyu and Yang, Jiayuan and Zhao, Zhiyuan and Zhang, Da and Li, Bingyu and Liu, Peng and Gao, Junyu}, title = {IntroSVG: Learning from Rendering Feedback for Text-to-SVG Generation via an Introspective Generator-Critic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {615-625} }
Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Shaowu and Jia, Xibin and Fan, Chao and Gao, Junyu and Chang, Jing and Sun, Qianmei}, title = {Progressive Cross-Modal Causal Intervention for Long-Term Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31186-31195} }
Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ruiying and Liang, Yuanzhi and Huang, Haibin and Yu, Tianshu and Zhang, Chi}, title = {Learning What to Trust: Bayesian Prior-Guided Optimization for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34408-34417} }
BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Renbo and SaraerToosi, Ali and Conroy, Nicholas S. and Pekhimenko, Gennady and Levis, Aviad}, title = {BHCast: Unlocking Black Hole Plasma Dynamics from a Single Blurry Image with Long-Term Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5606-5616} }
BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Chaodong and Zhang, Zhengqiang and Zhang, Lei}, title = {BinaryAttention: One-Bit QK-Attention for Vision and Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12106-12117} }
FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Wenshuo and Fan, Junyi and Zeng, Jiangyue and Yang, Shuai}, title = {FlowPortal: Residual-Corrected Flow for Training-Free Video Relighting and Background Replacement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2025-2034} }
ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ao and Li, Xingming and Ji, Xuanyu and He, Xixiang and Sun, Qiyao and Qiu, Chunping and Huang, Runke and Hu, Qingyong}, title = {ENC-Bench: A Benchmark for Evaluating Multimodal Large Language Models in Electronic Navigational Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2423-2433} }
STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junyang and Dong, Jiangxin and Sun, Long and Yang, Yixin and Pan, Jinshan}, title = {STCDiT: Spatio-Temporally Consistent Diffusion Transformer for High-Quality Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38281-38290} }
BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhan and Lyu, Chen}, title = {BD-Merging: Bias-Aware Dynamic Model Merging with Evidence-Guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12892-12901} }
OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2026_CVPR, author = {Cha, Junuk and Kim, Jihyeon and Park, Han-Mu}, title = {OpenFS: Multi-Hand-Capable Fingerspelling Recognition with Implicit Signing-Hand Detection and Frame-Wise Letter-Conditioned Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30707-30717} }
TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Rong and Wang, Runqi and Zhang, Yingjun and Tao, Tao and Li, Xiaomeng and Jing, Liping}, title = {TLMA: Mitigating the Impact of Weakly Labeled Information for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35597-35606} }
PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xianyun and Miao, Jiaxu and Xu, Tian and Wang, Siyuan and Li, Yuehao and Hu, Haoyang and Xiao, Jun and Tian, Yonghong and Yu, Jun}, title = {PromptDepth: Efficient and Promptable Geometric 3D Vision Model for Embodied Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28074-28085} }
SounDiT: Geo-Contextual Soundscape-to-Landscape Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Junbo and Tan, Haofeng and Liao, Bowen and Jiang, Albert and Fei, Teng and Huang, Qixing and Zhou, Bing and Tu, Zhengzhong and Ye, Shan and Kang, Yuhao}, title = {SounDiT: Geo-Contextual Soundscape-to-Landscape Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32659-32670} }
One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Haji-Ali_2026_CVPR, author = {Haji-Ali, Moayed and Menapace, Willi and Skorokhodov, Ivan and Park, Dogyun and Kag, Anil and Vasilkovsky, Michael and Tulyakov, Sergey and Ordonez, Vicente and Siarohin, Aliaksandr}, title = {One Model, Many Budgets: Elastic Latent Interfaces for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4558-4568} }
OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pian_2026_CVPR, author = {Pian, Weiguo and Kushwaha, Saksham Singh and Chen, Zhimin and Deng, Shijian and Wang, Kai and Guo, Yunhui and Tian, Yapeng}, title = {OmniSonic: Towards Universal and Holistic Audio Generation from Video and Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {540-549} }
Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xinrong and Chu, Xu and Qiu, Yingmin and Zhang, Hengyuan and Xiong, Jing and Tang, Shiyu and Liu, Shuai and Yang, Shaokang and Yang, Cheng and So, Hayden Kwok-Hay and Wong, Ngai}, title = {Residual Decoding: Mitigating Hallucinations in Large Vision-Language Models via History-Aware Residual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25281-25292} }
SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment-
[pdf]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Tianle and Yan, Fang and Zhang, Xiaofan}, title = {SAR2Net: Learning Spatially Anchored Representations for Retrieval-Guided Cross-Stain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12544-12553} }
PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Weijie and Xing, Songlong and Zhao, Zhengyu and Sebe, Nicu and Lepri, Bruno}, title = {PoInit-of-View: Poisoning Initialization of Views Transfers Across Multiple 3D Reconstruction Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20670-20679} }
CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Di and Gong, Zhangpeng and Pang, Xiaobo and Liu, Jiashuai and Lu, Junbo and Cui, Hao and Ge, Jiusong and Zeng, Zhi and Yi, Kai and Li, Yinghua and Liu, Si and Yu, Tingsong and Wang, Haoran and Crispin-Ortuzar, Mireia and Yu, Weimiao and Li, Chen and Gao, Zeyu}, title = {CARE: A Molecular-Guided Foundation Model with Adaptive Region Modeling for Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21078-21088} }
A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Shang, Tianyi}, title = {A2GC: Asymmetric Aggregation with Geometric Constraints for Locally Aggregated Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19423-19431} }
Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yongbo and Guo, Zirun and Jin, Tao}, title = {Decoupling Stability and Plasticity for Multi-Modal Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15020-15029} }
ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhenyang and Gu, Yongchong and Wang, Yikai and Xue, Xiangyang and Fu, Yanwei}, title = {ActiveVLA: Injecting Active Perception into Vision-Language-Action Models for Precise 3D Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8141-8151} }
MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Soomin and Lee, Eunseong and Bin Lee, Kwang and Lee, Sung-Hee}, title = {MaskAdapt: Learning Flexible Motion Adaptation via Mask-Invariant Prior for Physics-Based Characters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2285-2294} }
SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Yu and Wickremasinghe, Tharindu and Nadir, Zeeshan and Wang, Xijun and Chi, Yiheng and Chan, Stanley H.}, title = {SeeU: Seeing the Unseen World via 4D Dynamics-aware Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11150-11162} }
MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Guangjing and Qin, Ziyuan and Zhang, Chaoran and Du, Chenlin and Wang, Jinglin and Sun, Wanran and Zhang, Zhenyu and Ji, Bing and Lao, Qicheng}, title = {MedLoc-R1: Performance-Aware Curriculum Reward Scheduling for GRPO-Based Medical Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21036-21045} }
REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaze and Yin, Hao and Tan, Wenhui and Chen, Jingyang and Xu, Boshen and Qu, Yuxun and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian}, title = {REVISOR: Beyond Textual Reflection, Towards Multimodal Introspective Reasoning in Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5059-5069} }
Hierarchical Concept Embedding & Pursuit for Interpretable Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Nghia and Ding, Tianjiao and Vidal, Ren\'e}, title = {Hierarchical Concept Embedding \& Pursuit for Interpretable Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2907-2917} }
One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yongru and Zhang, Kai and Zong, Zeliang and Lu, Yuchen and Tan, Wenming and Ren, Ye and Hu, Jilin}, title = {One Layer's Trash is Another Layer's Treasure: Adaptive Layer-wise Visual Token Selection in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17672-17681} }
R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Shuaike and Liu, Ke and Xie, Jiaqing and Gao, Shangde and Shen, Chunhua and Liu, Ge and Crispin-Ortuzar, Mireia and Gao, Shangqi}, title = {R2-Seg: Training-Free OOD Medical Tumor Segmentation via Anatomical Reasoning and Statistical Rejection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21669-21678} }
Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{De_Moura_Matos_2026_CVPR, author = {De Moura Matos, Ivan Luiz and Saoud, Abdel Djalil Sad and Iakovleva, Ekaterina and Pastore, Vito Paolo and Tartaglione, Enzo}, title = {Bias In, Bias Out? Finding Unbiased Subnetworks in Vanilla Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3294-3305} }
RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yipeng and Wang, Xin and Yang, Chenghan and Wang, Chong and Wu, Dongdong and Su, Wanchao and Zhao, Hengshuang and Feng, Wei and Yang, Kairui and Lin, Di}, title = {RecEdit-Drive: 3D Reconstruction-Guided Spatiotemporal Video Editing for Autonomous Driving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25415-25425} }
Particulate: Feed-Forward 3D Object Articulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruining and Yao, Yuxin and Zheng, Chuanxia and Rupprecht, Christian and Lasenby, Joan and Wu, Shangzhe and Vedaldi, Andrea}, title = {Particulate: Feed-Forward 3D Object Articulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27708-27718} }
EXOTIC: External Vision-driven Incomplete Multi-view Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Shilin and Peng, Dezhong and Ren, Zhenwen and Sun, Yuan}, title = {EXOTIC: External Vision-driven Incomplete Multi-view Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30216-30225} }
ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minseo and Kwon, Minchan and Lee, Dongyeun and Jeon, Yunho and Kim, Junmo}, title = {ConceptPrism: Concept Disentanglement in Personalized Diffusion Models via Residual Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2381-2390} }
Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yinan and Hu, Kejia and Chen, Ye and Dou, Jianyu and Wang, Jiahui and Zhao, Jingyu and Ao, Haojia and Yang, Yi and Yue, Yufeng}, title = {Video2Robo: 3DGS-based Synthetic Data from One Video Enables Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6695-6705} }
Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xueyu and Zhang, Xiaoyi and Liu, Meilin and Shi, Guangze and Shen, Jia and Wang, Yujie and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang and Chen, Yongle}, title = {Attack for Defense: Adversarial Agents for Point Prompt Optimization Empowering Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6591-6600} }
Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression-
[pdf]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Mengmeng and Sun, Zeren and Chen, Tao and Pan, Jinshan and Yao, Yazhou and Shen, Fumin}, title = {Revisiting Learning with Noisy Labels: Active Forgetting and Noise Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24792-24802} }
FINER: MLLMs Hallucinate under Fine-grained Negative Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Rui and Kim, Sanghwan and Xian, Yongqin and Akata, Zeynep and Alaniz, Stephan}, title = {FINER: MLLMs Hallucinate under Fine-grained Negative Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36235-36244} }
Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Wu, Zimeng and Wu, Yu and Fu, Zehua and Chen, Jiaxin}, title = {Visual Prototype Conditioned Focal Region Generation for UAV-Based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3772-3782} }
MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification-
[pdf]
[supp]
[bibtex]@InProceedings{Kwak_2026_CVPR, author = {Kwak, Sangwoon and Kwon, Weeyoung and Jeong, Jun Young and Kim, Geonho and Cheong, Won-Sik and Oh, Jihyong}, title = {MoRel: Long-Range Flicker-Free 4D Motion Modeling via Anchor Relay-based Bidirectioanl Blending with Hierarchical Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37370-37379} }
Multi-Scale Local Speculative Decoding for Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Peruzzo_2026_CVPR, author = {Peruzzo, Elia and Sauti\`ere, Guillaume and Habibian, Amirhossein}, title = {Multi-Scale Local Speculative Decoding for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5253-5262} }
Fully Decentralized Certified Unlearning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lamri_2026_CVPR, author = {Lamri, Hithem and Maniatakos, Michail}, title = {Fully Decentralized Certified Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24577-24586} }
CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Hongjin and Ma, Jian and Chen, Hongjie and Li, Jia and Hu, Ruizhen and Lai, Yu-Kun and Li, Kun}, title = {CG-Floor: Centroid-Guided Diffusion for Large-Scale Floorplan Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18354-18363} }
CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hengqi and Zhou, Wanting and Kong, Longteng and Feng, Fangxiang and Ren, Lei and Chen, Wei and Wang, Xiaojie}, title = {CoV-Align: Efficient Fine-grained Cross-Modal Alignment with Cohesive Visual Semantics Priority}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36828-36837} }
Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamao_2026_CVPR, author = {Yamao, Sosuke and Miyahara, Natsuki and Qi, Yuankai and Takeuchi, Shun}, title = {Question-guided Visual Compression with Memory Feedback for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32850-32859} }
A Difference-in-Difference Approach to Detecting AI-Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Xinyi and Ye, Kai and Shi, Chengchun and Yang, Ying and Zhu, Jin and Zhou, Hongyi}, title = {A Difference-in-Difference Approach to Detecting AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42964-42975} }
Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaogang and Hu, Jinchao and Wang, Zixian and Liu, Dun and Cheng, BoXiang and Wu, Yiqiang}, title = {Probabilistic Discrepancy Learning for Roadside LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9955-9964} }
CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Jinheng and Qu, Jiahui and Dong, Wenqian and Li, Yunsong}, title = {CF-IPT: Cross-Modal Fusion Interactive Prompt Tuning of Vision-Language Pre-Trained Model for Multisource Remote Sensing Data Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23021-23030} }
CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Duanchu and Yang, Junjie and Gong, Haoran and Liu, Jing and Wang, Di}, title = {CompetitorFormer: Mitigating Query Conflicts for 3D Instance Segmentation via Competitive Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34724-34733} }
Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yong and Scharstein, Daniel and Bapat, Akash and Hu, Hao and Fu, Andrew and Zhao, Haoru and Sammut, Paul and Li, Xiang and Jeapes, Stephen and Gupta, Anik and David, Lior and Madhuvarasu, Saketh and Joshi, Jay Girish and Wither, Jason}, title = {Ego-1K - A Large-Scale Multiview Video Dataset for Egocentric Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19854-19863} }
LensWalk: Agentic Video Understanding by Planning How You See in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Keliang and Li, Yansong and Shen, Hongze and Liu, Mengdi and Chang, Hong and Shan, Shiguang}, title = {LensWalk: Agentic Video Understanding by Planning How You See in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19518-19528} }
Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Linkang and Li, Gang and Song, Yue and Ji, Xiangxin}, title = {Hilbert Curve-Based Attention Enabling Topology-Preserving Image Tensor Representation for Semantic Segmentation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13113-13122} }
Ego-Grounding for Personalized Question-Answering in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Zhang, Shenglang and Zhu, Pengxiang and Yao, Angela}, title = {Ego-Grounding for Personalized Question-Answering in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40537-40547} }
Gaussian Mapping for Evolving Scenes-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yugay_2026_CVPR, author = {Yugay, Vladimir and Kersten, Thies and Carlone, Luca and Gevers, Theo and Oswald, Martin R. and Schmid, Lukas}, title = {Gaussian Mapping for Evolving Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18903-18912} }
MERIT: Multi-domain Efficient RAW Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wenjun and Fu, Shenghao and Jin, Yian and Ni, Yang and Cui, Ziteng and Chen, Hanning and He, Yirui and Liu, Yezi and Yun, Sanggeon and Jeong, SungHeon and Masukawa, Ryozo and Chung, William Youngwoo and Imani, Mohsen}, title = {MERIT: Multi-domain Efficient RAW Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37216-37225} }
Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yutong and Chen, Jiaxin and Chen, Honglin and Zheng, Kaiqi and Liao, Shengcai and Zhong, Hanwen and Li, Weixin and Wang, Yunhong}, title = {Memory-Efficient Transfer Learning with Fading Side Networks via Masked Dual Path Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25043-25054} }
LAOF: Robust Latent Action Learning with Optical Flow Constraints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bu_2026_CVPR, author = {Bu, Xizhou and Lyu, Jiexi and Sun, Fulei and Yang, Ruichen and Ma, Zhiqiang and Li, Wei}, title = {LAOF: Robust Latent Action Learning with Optical Flow Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27334-27344} }
Language-driven Fine-grained Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shijie and Yu, Xin and Luo, Yadan and Wang, Zijian and Zhang, Pengfei and Huang, Zi}, title = {Language-driven Fine-grained Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2682-2692} }
Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ke and Song, Bolin and Liu, Hongbo}, title = {Seeing Depth Through Frequency and Motion: A Progressive Training Paradigm for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26845-26854} }
X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Youngseo and Yun, Kwan and Hong, Seokhyeon and Cha, Sihun and Koo, Colette Suhjung and Noh, Junyong}, title = {X-AVDT: Audio-Visual Cross-Attention for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4403-4414} }
SURF: Signature-Retained Fast Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Kaixin and Chen, Xi and Ji, Sihui and Gao, Yuan and Hou, Liang and Tao, Xin and Zhao, Hengshuang}, title = {SURF: Signature-Retained Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9171-9181} }
See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zongru and Mao, Rui and Tian, Zhiyuan and Cheng, Pengzhou and Ju, Tianjie and Wu, Zheng and Dong, Lingzhong and Sheng, Haiyue and Zhang, Zhuosheng and Liu, Gongshen}, title = {See, Think, Act: Teaching Multimodal Agents to Effectively Interact with GUI by Identifying Toggles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27536-27546} }
FeatureFool: Zero-Query Fooling of Video Models via Feature Map-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Duoxun and Xiao, Xi and Hu, Guangwu and Sun, Kangkang and Yang, Xiao and Chen, Dongyang and Li, Qing and Yin, Yong-jie and Wang, Jiyao}, title = {FeatureFool: Zero-Query Fooling of Video Models via Feature Map}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42268-42279} }
A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Shuang and Ren, Pengfei and Sun, Haifeng and Pan, Ting and Qi, Qi and Zhang, Lei and Liu, Cong and Liao, Jianxin and Wang, Jingyu}, title = {A Temporal and Content Co-Awareness Latent Diffusion for Controllable Hand Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38324-38334} }
StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Boyu and Ye, Yunfan and Liu, Chang and Wu, Weishang and Liu, Fang and Cai, Zhiping}, title = {StyleGallery: Training-free and Semantic-aware Personalized Style Transfer from Arbitrary Image References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29092-29102} }
Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nan_2026_CVPR, author = {Nan, Mu and Yu, Muquan and Mai, Weijian and Prince, Jacob S. and Adeli, Hossein and Zhang, Rui and Cao, Jiahang and Becker, Benjamin and Pyles, John A. and Henderson, Margaret M. and Song, Chunfeng and Kriegeskorte, Nikolaus and Tarr, Michael J. and Hu, Xiaoqing and Luo, Andrew F.}, title = {Meta-Learning In-Context Enables Training-Free Cross Subject Brain Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3604-3616} }
fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yuxiang and Zhang, Yanteng and Xiao, Xi and Qian, Chengxuan and Wang, Tianyang and Calhoun, Vince D.}, title = {fMRI-LM: Towards a Universal Foundation Model for Language-Aligned fMRI Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6931-6940} }
CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lin and Liu, Fang and Xing, Xiaofen and Guo, Kailing and Xu, Xiangmin}, title = {CLEX: Complementary Label Exchange Learning for Noisy Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10577-10586} }
OneHOI: Unifying Human-Object Interaction Generation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hoe_2026_CVPR, author = {Hoe, Jiun Tian and Hu, Weipeng and Jiang, Xudong and Tan, Yap-Peng and Chan, Chee Seng}, title = {OneHOI: Unifying Human-Object Interaction Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7664-7673} }
Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herzog_2026_CVPR, author = {Herzog, Jonas and Wang, Yue}, title = {Reevaluating the Intra-Modal Misalignment Hypothesis in CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24310-24319} }
Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Pengzhen and Liu, Yanwei and Gu, Xiaoyan and Chen, Xiaojun and Liu, Wu and Wang, Weiping}, title = {Rel-Zero: Harnessing Patch-Pair Invariance for Robust Zero-Watermarking Against AI Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3337-3346} }
Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zijun and Wang, Ping and Wang, Xiaodong and Chen, Chang and Yuan, Xin}, title = {Joint Spectral Image Reconstruction and Semantic Segmentation with Cooperative Unfolding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6910-6919} }
NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vargas_2026_CVPR, author = {Vargas, Edwin and Lopez, Jhon and Arguello, Henry and Veeraraghavan, Ashok}, title = {NOWA: Null-space Optical Watermark for Invisible Capture Fingerprinting and Tamper Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {102-112} }
Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiarui and Huang, Libo and Li, Xiangqi and An, Zhulin and Yang, Chuanguang and Wang, Yu and Diao, Boyu and Xu, Yongjun}, title = {Representation-Steered Incremental Adapter-Tuning for Class-Incremental Learning with Pre-Trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18010-18020} }
HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyan and Zhang, Deyu and Weng, Dongdong and Song, Weitao and Duh, Henry Been-Lirn}, title = {HOG-Layout: Hierarchical 3D Scene Generation, Optimization and Editing via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31586-31596} }
SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qin and Zhang, Wenbo and Liu, Limei and Peng, Han and Yang, Junfeng and Xu, Guanying}, title = {SeD-UD: An Influence-Driven and Hierarchically-Decoupled Information Bottleneck for Multimodal Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30346-30356} }
When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yiyang and Tu, Haoqin and Wang, Zijun and Wang, Zeyu and Muennighoff, Niklas and Nie, Fan and Deng, Chaorui and Yan, Shen and Fan, Haoqi and Choi, Yejin and Zou, James and Xie, Cihang and Yao, Huaxiu and Ye, Qinghao}, title = {When Visualizing is the First Step to Reasoning: MIRA, a Benchmark for Visual Chain-of-Thought}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26154-26164} }
Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jihang and Zhao, Dongcheng and Chen, Ruolin and Zhang, Qian and Zeng, Yi}, title = {Towards Reliable Evaluation of Adversarial Robustness for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20691-20700} }
Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Chen, Bowen and Yang, Zhihao and Huang, Wenze and Gao, Yu and Liu, Xueting and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {Spectral Scalpel: Amplifying Adjacent Action Discrepancy via Frequency-Selective Filtering for Skeleton-Based Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12849-12859} }
SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Kaiwen and Zhou, Yi and Zhang, Yizhe and Li, Jingxiong and Zhou, Tao}, title = {SemiGDA: Generative Dual-distribution Alignment for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1450-1460} }
DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peiying and Zhao, Nanxuan and Fisher, Matthew and Xu, Yiran and Liao, Jing and Liu, Difan}, title = {DuetSVG: Unified Multimodal SVG Generation with Internal Visual Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10219-10229} }
FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hanxiao and Guo, Yuan-Chen and Liu, Ying-Tian and Zou, Zi-Xin and Zhang, Biao and Quan, Weize and Liang, Ding and Cao, Yan-Pei and Yan, Dong-Ming}, title = {FACE: A Face-based Autoregressive Representation for High-Fidelity and Efficient Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12719-12729} }
FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fontana_2026_CVPR, author = {Fontana, Maxime and Spratling, Michael and Shi, Miaojing}, title = {FAAR: Efficient Frequency-Aware Multi-Task Fine-Tuning via Automatic Rank Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31135-31144} }
MatE: Material Extraction from Single-Image via Geometric Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Zhai, Wei and Yang, Jian and Cao, Yang}, title = {MatE: Material Extraction from Single-Image via Geometric Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12480-12490} }
GeoWorld: Geometric World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zeyu and Li, Danning and Reid, Ian and Hartley, Richard}, title = {GeoWorld: Geometric World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30952-30963} }
MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Rui and Xue, Tianyang and Dong, Qiujie and Wan, Le and Zhu, Zhe and Li, Peng and Dou, Zhiyang and Lin, Cheng and Xin, Shiqing and Liu, Yuan and Wang, Wenping and Komura, Taku}, title = {MeshMosaic: Scaling Artist Mesh Generation via Local-to-Global Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20003-20013} }
GenMatter: Perceiving Physical Objects with Generative Matter Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Eric and Dasgupta, Arijit and Friedman, Yoni and Huot, Mathieu and Mansinghka, Vikash and O'Connell, Thomas and Freeman, William T. and Tenenbaum, Joshua B.}, title = {GenMatter: Perceiving Physical Objects with Generative Matter Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3165-3175} }
IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Back_2026_CVPR, author = {Back, Jonghee and Kim, Jongju and Kim, Jeong-Uk and Kim, Eunjin and Jeon, Minyong}, title = {IFCSR: Inference-Free Fidelity-Realism Control for One-Step Diffusion-based Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38187-38197} }
SPDMark: Selective Parameter Displacement for Robust Video Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fares_2026_CVPR, author = {Fares, Samar and Tastan, Nurbek and Nandakumar, Karthik}, title = {SPDMark: Selective Parameter Displacement for Robust Video Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10303-10312} }
4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Jiaxun and Fan, Wentao and Amayri, Manar and Bouguila, Nizar}, title = {4D Local Modeling Toward Dynamic Global Perception for Ambiguity-free Rotation-Invariant Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31440-31449} }
OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Xie, Jingjing and Li, Zehao and Yang, Bowen and Sun, Qiushi and Liu, Zhaoyang and Liu, Zhoumianze and Qiao, Yu and Yue, Xiangyu and Wang, Zun and Ding, Zichen}, title = {OS-Oracle: A Comprehensive Framework for Cross-Platform GUI Critic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27514-27524} }
Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Huang, Xiaotong and Liu, Zihan and Lin, Weikai and Liu, Xiaohong and He, Zhezhi and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {Seele: A Unified Acceleration Framework for Real-Time Gaussian Splatting on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25979-25989} }
Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhumei and Hu, Zechen and Guo, Ruoxi and Pi, Huaijin and Feng, Ziyong and Zhang, Liang and Pei, Mingtao and Huang, Siyuan}, title = {Mocap-2-to-3: Multi-view Lifting for Monocular Motion Recovery with 2D Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42869-42878} }
Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Mingjie and ma, yichao and Yang, Zhong and Li, Guohui}, title = {Seeing What Matters: A Training-Free Self-Guided Framework for Multimodal Detail Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8727-8736} }
LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zhichao and Zhang, Jiasheng and Sun, Jiyun and Cui, Jiangtao and Qiao, Xiaotian}, title = {LayoutAD: Exploring Semantic-Geometric Misalignment Reasoning for Scene Layout Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35567-35576} }
MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deria_2026_CVPR, author = {Deria, Ankan and Kumar, Komal and Dukre, Adinath Madhavrao and Segal, Eran and Khan, Salman and Razzak, Imran}, title = {MedMO: Grounding and Understanding Multimodal Large Language Model for Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5092-5103} }
Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Ashutosh, Kumar and Grauman, Kristen}, title = {Stitch-a-Demo: Creating Video Demonstrations from Multistep Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23988-23999} }
Few-Step Diffusion Sampling Through Instance-Aware Discretizations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Wang, Ruoyu and Zhao, Tong and Fu, Dingwen and Lei, Mingkun and Zhu, Beier and Zhang, Chi}, title = {Few-Step Diffusion Sampling Through Instance-Aware Discretizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35882-35892} }
Revisiting 3D Reconstruction Kernels as Low-Pass Filters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Chen, Min and Wei, Yibo and Dong, Mingyu and Duan, Yueqi}, title = {Revisiting 3D Reconstruction Kernels as Low-Pass Filters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33374-33383} }
Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Soo Won and Lee, KyungChae and Cho, Hyungchan and Son, Taein and Cho, Nam Ik and Choi, Jun Won}, title = {Mining Instance-Centric Vision-Language Contexts for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40418-40427} }
P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shimomura_2026_CVPR, author = {Shimomura, Kota and Arai, Hidehisa and Takahashi, Tsubasa and Yamashita, Takayoshi and Fujiyoshi, Hironobu}, title = {P2GS: Physical Prior-guided Gaussian Splatting for Photometrically Consistent Urban Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11736-11745} }
UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuchuan and Motamedi, Azadeh and Kwon, Hyock Ju and Park, Chul B and Kim, Il-Min}, title = {UNI-OOD: Unified Object- and Image-level Out-of-Distribution Detection via Cross-Context Attentive Vision-Language Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6282-6292} }
SpatialVID: A Large-Scale Video Dataset with Spatial Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Yuan, Yufeng and Zheng, Rujie and Lin, Youtian and Gao, Jian and Chen, Lin-Zhuo and Bao, Yajie and Zeng, Chang and Zhou, Yanxi and Long, Xiao-Xiao and Zhu, Hao and Zhang, Zhaoxiang and Cao, Xun and Yao, Yao}, title = {SpatialVID: A Large-Scale Video Dataset with Spatial Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42592-42603} }
Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Zhiheng and Hu, Yupeng and Yang, Qianyun and Zhang, Shiqi and Chen, Zhiwei and Li, Zixu}, title = {Air-Know: Arbiter-Calibrated Knowledge-Internalizing Robust Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2658-2670} }
MeshSplatting: Differentiable Rendering with Opaque Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Held_2026_CVPR, author = {Held, Jan and Son, Sanghyun and Vandeghen, Renaud and Rebain, Daniel and Gadelha, Matheus and Zhou, Yi and Cioppa, Anthony and Lin, Ming C. and Van Droogenbroeck, Marc and Tagliasacchi, Andrea}, title = {MeshSplatting: Differentiable Rendering with Opaque Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7320-7329} }
ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Sicheng and Wang, Song and Ouyang, Shuyi and Kong, Lingdong and Song, Zikai and Zhu, Jianke and Wang, Huan and Wang, Xinchao}, title = {ReasonMap: Towards Fine-Grained Visual Reasoning from Transit Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41077-41088} }
3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamada_2026_CVPR, author = {Yamada, Ryousuke and Ide, Kohsuke and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Puy, Gilles and Bursuc, Andrei and Asano, Yuki M.}, title = {3D sans 3D Scans: Scalable Pre-training from Video-Generated Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39075-39085} }
DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yuxi and Liu, Sujie and Yang, Jing and Wang, Jiacheng and Chen, Yiping and Magnier, Baptiste and Wang, Liansheng}, title = {DK-DDIL: Adaptive Knowledge Retention for Dynamic Domain-Incremental Learning in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36290-36299} }
CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Binbin and Duan, Haobin and Zhao, Yiqun and Zhao, Zibo and Ma, Yi and Gao, Shenghua}, title = {CUPID: Generative 3D Reconstruction via Joint Object and Pose Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12741-12752} }
Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yiming and Zhao, Xi-Le and Wu, Wei-Hao and Ji, Teng-Yu and Wang, Chao}, title = {Gaussian Splatting-based Low-Rank Tensor Representation for Multi-Dimensional Image Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19360-19369} }
Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning-
[pdf]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Yongxin and Chen, Weisen and Chen, Xingye and Shao, Yuanjie and Zuo, Zhengrong and Tan, Wenming and Ren, Wenqi and Gao, Changxin and Sang, Nong}, title = {Semantic-Guided Global-Local Collaborative Prompt Learning for Few-Shot Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5511-5520} }
Improving Adversarial Transferability with Local Perturbation Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Zhong, Xuanhui and Li, Weisheng}, title = {Improving Adversarial Transferability with Local Perturbation Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20639-20649} }
MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Kyungwon and Hwang, Dosik}, title = {MUST: Modality-Specific Representation-Aware Transformer for Diffusion-Enhanced Survival Prediction with Missing Modality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30312-30321} }
FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Haoming and Qu, Delin and Yao, Yuanqi and Chen, Qizhi and Li, Jiarui and Lv, Qi and Tang, Yiwen and Kang, Li and Zhou, Heng and Gao, Xianqiang and Tang, Yuhang and Li, Xiaofan and Shi, Modi and Ren, Guanghui and Yao, Maoqing and Zhao, Bin and Wang, Dong and Li, Xuelong}, title = {FM-Steer: Enhance Generalist Policies with Value-Guided Cascaded Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13407-13418} }
Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Yin, Shiqi and Lian, Zhenxiang and Guo, Jingtao}, title = {Diff-SemiER: Transparency-Aware Adaptive Fusion Diffusion Model with Generative Prior for Semi-Transparent Eyeglasses Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30813-30822} }
LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu}, title = {LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36433-36443} }
Hierarchical Codec Diffusion for Video-to-Speech Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Jiaxin and Cong, Gaoxiang and Wang, Chenhui and Wen, Xin-Cheng and Li, Zhaoyang and Cao, Boyuan and Shan, Hongming}, title = {Hierarchical Codec Diffusion for Video-to-Speech Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43352-43362} }
FlowFixer: Towards Detail-Preserving Subject-Driven Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jun_2026_CVPR, author = {Jun, Jinyoung and Jang, Won-Dong and Ouyang, Wenbin and Gadde, Raghudeep and Lee, Jungbeom}, title = {FlowFixer: Towards Detail-Preserving Subject-Driven Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22049-22058} }
Efficient Encoder-Free Fourier-based 3D Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2026_CVPR, author = {Mei, Guofeng and Lin, Wei and Riz, Luigi and Wu, Yujiao and Wang, Yiming and Poiesi, Fabio}, title = {Efficient Encoder-Free Fourier-based 3D Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23785-23794} }
Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and Yang, Peishan and Jin, Yudong and Cai, Yingfeng and Yin, Wei and Ren, Weiqiang and Zhang, Qian and Hua, Wei and Peng, Sida and Guo, Xiaoyang and Zhou, Xiaowei}, title = {Scal3R: Scalable Test-Time Training for Large-Scale 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21760-21771} }
SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zekun and Wang, Ning and Bai, Tongxin and Mei, Changwang and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {SparVAR: Exploring Sparsity in Visual AutoRegressive Modeling for Training-Free Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19308-19318} }
LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shihab_2026_CVPR, author = {Shihab, Ibne Farabi and Akter, Sanjeda and Sharma, Anuj}, title = {LLM-Guided Probabilistic Fusion for Label-Efficient Document Layout Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3574-3583} }
Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaye and Chen, Baoyou and Li, Hui and Dong, Zilong and Wang, Jingdong and Zhu, Siyu}, title = {Head-wise Adaptive Rotary Positional Encoding for Fine-Grained Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26338-26347} }
SO-Bench: A Structural Output Evaluation of Multimodal LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Di and Ma, Kaixin and Nan, Feng and Chen, Haofeng and Zhai, Bohan and Griffiths, David and Gao, Mingfei and Gan, Zhe and Verma, Eshan and Yang, Yinfei and Chen, Zhifeng and Dehghan, Afshin}, title = {SO-Bench: A Structural Output Evaluation of Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37694-37704} }
Eulerian Gaussian Splatting using Hashed Probability Pyramids-
[pdf]
[supp]
[bibtex]@InProceedings{Polansky_2026_CVPR, author = {Polansky, Mia Gaia and Kopanas, George and Garbin, Stephan and Zickler, Todd and Verbin, Dor}, title = {Eulerian Gaussian Splatting using Hashed Probability Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19045-19053} }
Real-Time Neural Video Compression with Unified Intra and Inter Coding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Hui and Bian, Yifan and Li, Li and Wu, Jingran and Zhang, Xianguo and Liu, Dong}, title = {Real-Time Neural Video Compression with Unified Intra and Inter Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35217-35226} }
RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mu and Ma, Tianren and Liu, Yunfan and Hu, Kun and Ye, Qixiang}, title = {RebRL: Reinforcing Discrete Visual Diffusion Models with Rebalanced Timestep Credits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43135-43144} }
COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Peizheng and Wang, Jingyao and Qiang, Wenwen and Zhou, Jiahuan and Zheng, Changwen and Hua, Gang}, title = {COPO: Causal-Oriented Policy Optimization for Hallucinations of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11051-11063} }
Task-Aware Image Signal Processor for Advanced Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kai and Xiao, Jin and Zhang, Leheng and Shi, Kexuan and Gu, Shuhang}, title = {Task-Aware Image Signal Processor for Advanced Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33672-33681} }
AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Teng and Lu, Yanting and Wang, Ruize}, title = {AutoTraces: Autoregressive Trajectory Forecasting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4054-4064} }
Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Beichen and Zang, Yuhang and Dong, Xiaoyi and Cao, Yuhang and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Think Visually, Reason Textually: Vision-Language Synergy in Abstract Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41203-41212} }
UniLight: A Unified Representation for Lighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zitian and Georgiev, Iliyan and Fischer, Michael and Hold-Geoffroy, Yannick and Lalonde, Jean-Francois and Deschaintre, Valentin}, title = {UniLight: A Unified Representation for Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29684-29694} }
Lyapunov Probes for Hallucination Detection in Large Foundation Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luan_2026_CVPR, author = {Luan, Bozhi and Li, Gen and Qin, Yalan and Guo, Jifeng and Zhou, Yun and Wu, Faguo and Zheng, Hongwei and Wu, Wenjun and Fan, Zhaoxin}, title = {Lyapunov Probes for Hallucination Detection in Large Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25336-25346} }
Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoxiao and Li, Mingyang and Yuan, Kun and Sun, Min Woo and Endo, Mark and Wu, Shengguang and Li, Changlin and Zhang, Yuhui and Wang, Zeyu and Yeung-Levy, Serena}, title = {Do VLMs Perceive or Recall? Probing Visual Perception vs. Memory with Classic Visual Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25861-25870} }
Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ninghui and Tosi, Fabio and Wang, Lihui and Han, Jiawei and Bartolomei, Luca and Yao, Zhiting and Poggi, Matteo and Mattoccia, Stefano}, title = {Bidirectional Cross-Modal Prompting for Event-Frame Asymmetric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {914-925} }
MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Binghui and Zhou, Lin and Xu, Haoxuan and Yan, Jianan and Yu, Zhipeng and Liu, Zekai and Wang, Yangang}, title = {MaskDexGrasp: Generative Masked Modeling for Part-Aware Dexterous Grasp Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29598-29609} }
Disco-GS: Gaussian Splatting in Dynamic Color Lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Rajagopalan, A. N.}, title = {Disco-GS: Gaussian Splatting in Dynamic Color Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11891-11900} }
Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and Meral, Tuna and Akan, Adil Kaan and Oktay, Kaan and Yanardag, Pinar}, title = {Infinity-RoPE: Action-Controllable Infinite Video Generation Emerges From Autoregressive Self-Rollout}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40256-40265} }
Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Sun, Lei and Yu, Zongxi and Yang, Kailun and Wu, Peixuan and Zhou, Jiacheng and Gao, Yao and Ma, Yaoguang and Yang, Ming-Hsuan and Wang, Kaiwei}, title = {Learning Latent Transmission and Glare Maps for Lens Veiling Glare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33995-34005} }
PureCC: Pure Learning for Text-to-Image Concept Customization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Zhichao and Xian, Xiaole and Li, Qingyu and Qin, Wenyu and Wang, Meng and Xie, Weicheng and Song, Siyang and Feng, Pingfa and Zeng, Long and Pan, Liang}, title = {PureCC: Pure Learning for Text-to-Image Concept Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7730-7740} }
UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Keming and Huang, Zhipeng and Fu, Canmiao and Liu, Qingyang and Cai, Jiani and Lv, Zheqi and Li, Chen and LYU, Jing and Zhao, Zhou and Zhang, Shengyu}, title = {UnicEdit-10M: A Dataset and Benchmark Breaking the Scale-Quality Barrier via Unified Verification for Reasoning-Enriched Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37279-37289} }
R$^2$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yubo and Lu, Yan and Liu, Bin and Li, Xulin and Niu, Jixiang}, title = {R\${\textasciicircum}2\$TUA: Reconstruction-residual Based Targeted and Untargeted Attack Against Text-Image Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22857-22866} }
U^2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xunpei and Lin, Wenwei and Chang, Yi and Chen, Gang}, title = {U{\textasciicircum}2Flow: Uncertainty-Aware Unsupervised Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28690-28700} }
Back to Basics: Let Denoising Generative Models Denoise-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tianhong and He, Kaiming}, title = {Back to Basics: Let Denoising Generative Models Denoise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36115-36125} }
PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mingqi and Yu, Tao and Song, Haolin and Li, Bo and Jin, Xin and Chen, Hua and Zeng, Wenjun}, title = {PvP: Data-Efficient Humanoid Robot Learning with Proprioceptive-Privileged Contrastive Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42375-42385} }
Scaling Dense Event-Stream Pretraining from Visual Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhiwen and Hou, Junhui and Zhu, Zhiyu and Wu, Jinjian and Shi, Guangming}, title = {Scaling Dense Event-Stream Pretraining from Visual Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8011-8022} }
SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vo_2026_CVPR, author = {Vo, Hao and Vo, Khoa and Phan, Thinh and Cuong, Ngo Xuan and Doretto, Gianfranco and Nguyen, Hien and Nguyen, Anh and Le, Ngan}, title = {SemLT3D: Semantic-Guided Expert Distillation for Camera-only Long-Tailed 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25788-25798} }
When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {When Do Models Actually Decide? Mapping the Layer-Wise Decision Timeline in Pretrained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34546-34554} }
CoWTracker: Tracking by Warping instead of Correlation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Zihang and Insafutdinov, Eldar and Sucar, Edgar and Vedaldi, Andrea}, title = {CoWTracker: Tracking by Warping instead of Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42571-42580} }
Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chengzhi and Huang, Heyan and Jian, Ping and Yang, Zhen and Tian, Yaning and Guo, Zhongbin}, title = {Understanding Temporal Logic Consistency in Video-Language Models through Cross-Modal Attention Discriminability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31812-31821} }
CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Xia and Chen, Ruiqi and Liu, Benlin and Ma, Jingwei and Di, Zonglin and Krishna, Ranjay and Froehlich, Jon}, title = {CapNav: Benchmarking Vision Language Models on Capability-conditioned Indoor Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4043-4053} }
Where, What, Why: Toward Explainable 3D-GS Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Mingshu and Li, Jiajun and Yoshie, Osamu and Ieiri, Yuya and Li, Yixuan}, title = {Where, What, Why: Toward Explainable 3D-GS Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20701-20710} }
WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Pan, Jiachun and Liang, Yongyuan and Zhou, Mingze and Song, Xue and Jia, Liyu and Zhang, Saining and Tang, Siliang and Li, Juncheng and Zhang, Fengda and Wu, Weijia and Zhang, Hanwang and Chua, Tat-Seng}, title = {WEAVE: Unleashing and Benchmarking the In-context Interleaved Comprehension and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15343-15353} }
Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Kunzhe and Zhou, Geo Jie and Liu, Xiaoming and Zeng, Huacheng}, title = {Rascene: High-Fidelity 3D Scene Imaging with mmWave Communication Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36454-36463} }
Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Chuancheng and Li, Shangze and Guo, Shiming and Xie, Simiao and Wu, Wenhua and Dou, Jingtong and Wu, Chao and Xiao, Canran and Wang, Cong and Cheng, Zifeng and Shen, Fei and Chua, Tat-Seng}, title = {Where Culture Fades: Revealing the Cultural Gap in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14702-14712} }
Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kaiqiang and Li, Gang and Zhou, Mingle and Li, Min and Han, Delong and Wan, Jin}, title = {Back to Point: Exploring Point-Language Models for Zero-Shot 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14167-14177} }
Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Daikun and Wang, Teng and Sun, Changyin}, title = {Depth Hypothesis Guided Iterative Refinement for Event-Image Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29504-29513} }
Transition Matching Distillation for Fast Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Weili and Berner, Julius and Ma, Nanye and Liu, Chao and Xie, Saining and Vahdat, Arash}, title = {Transition Matching Distillation for Fast Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4645-4655} }
MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Kehong and Wen, Zhengyu and He, Weixia and Xu, Mingxi and Wang, Qi and Zhang, Ning and Li, Zhengyu and Lian, Dongze and Zhao, Wei and He, Xiaoyu and Zhang, Mingyuan}, title = {MoCapAnything: Unified 3D Motion Capture for Arbitrary Skeletons from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7089-7099} }
SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoqian and Sun, Jiadai and Hou, Bosen and Wang, Qiang and Fan, Bin and Li, Bo and Lu, Bin and Dai, Yuchao}, title = {SPE-MVS: Spatial Position Encoding Enhanced Multi-View Stereo with Monocular Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14536-14545} }
SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaofeng and Zhang, Yu-Xin and Shen, Hao and Jin, Yeying and Hou, Junming and Gui, Jie}, title = {SDUIE: Semi-Supervised Diffusion for Underwater Image Enhancement with Quant-Text Dual Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37423-37433} }
No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Cho-Ying and Huang, Zixun and Huang, Xinyu and Ren, Liu}, title = {No Calibration, No Depth, No Problem: Cross-Sensor View Synthesis with 3D Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21836-21848} }
From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Xu, Qianqian and Wen, Peisong and Dai, Siran and Zhao, Xilin and Huang, Qingming}, title = {From Static to Dynamic: Exploring Self-supervised Image-to-Video Representation Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31250-31261} }
Transition Models: Rethinking the Generative Learning Objective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zidong and Zhang, Yiyuan and Yue, Xiaoyu and Yue, Xiangyu and Li, Yangguang and Ouyang, Wanli and Bai, Lei}, title = {Transition Models: Rethinking the Generative Learning Objective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29178-29189} }
Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Junhao and Xue, Jialong and Li, Anqi and Dai, Jincheng and Lu, Guo}, title = {Unified Spatiotemporal Token Compression for Video-LLMs at Ultra-Low Retention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17661-17671} }
Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuquan and Zhang, Hui and Lu, Wenyu and Zhang, Ziyin and Zhang, Chuanming and Xu, Xiaohua}, title = {Hybrid Robust Collaborative Perception with LiDAR-4D Radar Fusion under Adverse Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24237-24247} }
Bootstrapping Multi-view Learning for Test-time Noisy Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Changhao and Xue, Di and Li, Shuxian and Hao, Yanji and Peng, Xi and Hu, Peng}, title = {Bootstrapping Multi-view Learning for Test-time Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1627-1638} }
M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Cai, Zhuojiang and Wang, Mingdao and Guo, Meitong and Li, Tianxiao and Lin, Li and Wang, Yuwang}, title = {M3DLayout: A Multi-Source Dataset of 3D Indoor Layouts and Structured Descriptions for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34217-34226} }
Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuehao and Guan, Shanyan and Zhang, Weijia and Shang, Xuanming and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Octopus: History-Free Gradient Orthogonalization for Continual Learning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3898-3907} }
MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yimin and Xiao, Aoran and Chen, Hongruixuan and Xia, Junshi and Yokoya, Naoto}, title = {MM-OVSeg: Multimodal Optical-SAR Fusion for Open-Vocabulary Segmentation in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42202-42212} }
Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Shuwei and Zhu, Kejin and Ma, Shixing and Du, Xinzhe and Zhang, Baochang and Min, Zhe}, title = {Depth Any Endoscopy: Towards Self-Supervised Generalizable Depth Estimation in Monocular Endoscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34126-34137} }
Guiding a Diffusion Model by Swapping Its Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weijia and Liu, Yuehao and Guan, Shanyan and Ran, Wu and Ge, Yanhao and Li, Wei and Ma, Chao}, title = {Guiding a Diffusion Model by Swapping Its Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14263-14272} }
CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Xu, Qi'ao and Qian, Tianwen and Fu, Yuqian and Jiao, Yang and Wang, Xiaoling}, title = {CLiViS: Unleashing Cognitive Map through Linguistic-Visual Synergy for Embodied Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5134-5143} }
CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography-
[pdf]
[supp]
[bibtex]@InProceedings{Elazab_2026_CVPR, author = {Elazab, Gasser and Neuhaus, Frank and Ko{\ss}, Tilman and Splietker, Malte and Date, Aditya and Unterreiner, Michael and Jansen, Maximilian and Hellwich, Olaf}, title = {CARD: A Multi-Modal Automotive Dataset for Dense 3D Reconstruction in Challenging Road Topography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17820-17830} }
Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Yiwen and Zhao, Haiyu and Hu, Peng and Peng, Xi and Gou, Yuanbiao}, title = {Next-Scale Prediction: A Self-Supervised Approach for Real-World Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22669-22678} }
TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jiaming and Hou, Guanyu and Li, Hongwei and Huang, Zhicong and Chen, Kangjie and Yu, Yi and Jiang, Wenbo and Xu, Guowen and Zhang, Tianwei}, title = {TEAR: Temporal-aware Automated Red-teaming for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41-50} }
GGPT: Geometry-Grounded Point Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yutong and Wang, Yiming and Zhang, Xucong and Prokudin, Sergey and Tang, Siyu}, title = {GGPT: Geometry-Grounded Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28959-28968} }
Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hang and Yang, Ruocheng and You, Wenjie and Huang, Zhilin and Zhang, Daoqiang and Shao, Wei}, title = {Bulk RNA-seq Guided Multi-modal Detection of Anomalous Regions in Human Cancer via Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41815-41825} }
Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Boyang and Chen, Chaoqi and Yu, Yizhou}, title = {Mitigating Simplicity Bias in OOD Detection through Object Co-occurrence Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20345-20355} }
UIKA: Fast Universal Head Avatar from Pose-Free Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zijian and Zhou, Boyao and Hu, Liangxiao and Liu, Hongyu and Sun, Yuan and Wang, Xuan and Cao, Xun and Shen, Yujun and Zhu, Hao}, title = {UIKA: Fast Universal Head Avatar from Pose-Free Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18216-18228} }
GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Rang and Li, Lei and Ren, Shuhuai and Tian, Hao and Gu, Shuhao and Li, Shicheng and Yue, Zihao and Wang, Yudong and Ma, Wenhan and Yang, Zhe and Ma, Jingyuan and Sui, Zhifang and Luo, Fuli}, title = {GroundingME: Exposing the Visual Grounding Gap in MLLMs through Multi-Dimensional Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2412-2422} }
Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Sheyang and Sarvestani, Armin Shafiee and Xu, Jialu and Xu, Xiaoyu and Wang, Zhou}, title = {Aesthetic Camera Viewpoint Suggestion with 3D Aesthetic Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8278-8287} }
AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Yubo and Guan, Xianchao and Xiong, Zijun and Zhang, Zheng}, title = {AGFT: Alignment-Guided Fine-Tuning for Zero-Shot Adversarial Robustness of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22836-22846} }
GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaodong and Zheng, Yuanming and Wang, Suting and Yang, Junqi and Yang, Yuhong and Tu, Weiping and Wang, Zhongyuan}, title = {GEM-TFL: Bridging Weak and Full Supervision for Forgery Localization through EM-Guided Decomposition and Temporal Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42932-42941} }
Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory-
[pdf]
[bibtex]@InProceedings{Swain_2026_CVPR, author = {Swain, Bishal Ranjan and Cheoi, Kyung Joo and Ko, Jaepil}, title = {Coupling Liquid Time-Constant Encoders with Modern Hopfield Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27409-27417} }
OmniGen2: Towards Instruction-Aligned Multimodal Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenyuan and Wang, Jiahao and Zheng, Pengfei and Yan, Ruiran and Xiao, Shitao and Luo, Xin and Wang, Yueze and Li, Wanli and Jiang, Xiyan and Liu, Yexin and Zhou, Junjie and Xia, Ziyi and Liu, Ze and Li, Chaofan and Deng, Haoge and Luo, Kun and Zhang, Bo and Zhang, Jiajun and Liu, Dong and Lian, Defu and Wang, Xinlong and Wang, Zhongyuan and Huang, Tiejun and Liu, Zheng}, title = {OmniGen2: Towards Instruction-Aligned Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21964-21975} }
VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Shuhao and Liao, Youqi and Wang, Peijie and Liao, Wenlong and Zhang, Qilin and Busam, Benjamin and Chen, Xieyuanli and Liu, Yun}, title = {VLM-Loc: Localization in Point Cloud Maps via Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41365-41375} }
MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Wenhui and Yu, Xiaoyi and Li, Jiaze and Chen, Yijing and Ju, Jianzhong and Luo, Zhenbo and Song, Ruihua and Luan, Jian}, title = {MSJoE: Jointly Evolving MLLM and Sampler for Efficient Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19487-19496} }
FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yiyi and Wu, Yuhan and Li, Kunhang and Zhou, You and Zheng, Bo and Liu, Haiyang}, title = {FloodDiffusion: Tailored Diffusion Forcing for Streaming Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2295-2304} }
Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shaolin and Li, Yuying and Zhong, Lei and Li, Shigang and Li, Jianfeng}, title = {Edge-Focused Super-Resolution for Omnidirectional Images with Spherical Geometric Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38198-38207} }
From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Kun and Sun, Min and Chen, Zhen and Lozano, Alejandro and He, Xiangteng and Li, Shi and Navab, Nassir and Sun, Xiaoxiao and Padoy, Nicolas and Yeung-Levy, Serena}, title = {From Panel to Pixel: Zoom-In Vision-Language Pretraining from Biomedical Scientific Literature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42649-42658} }
PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haoqing and Nawotki, Alexa and Garcke, Jochen}, title = {PQDT: Pseudo-Query Dual Transformer for Robust Point Cloud Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24195-24205} }
Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Tianci and Pan, Haohao and Wang, Jinpeng and Lian, Niu and Chen, Xinrui and Chen, Bin and Xia, Shu-Tao and Yuan, Chun}, title = {Love Me, Love My Label: Rethinking the Role of Labels in Prompt Retrieval for Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38774-38783} }
MeshRipple: Structured Autoregressive Generation of Artist-Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Junkai and Long, Hang and Guo, Huipeng and Zhang, Jielei and Yang, Jiayi and Guo, Tianle and Yang, Yang and Li, Jianwen and ZHANG, Wenxiao and Nie{\ss}ner, Matthias and Yang, Wei}, title = {MeshRipple: Structured Autoregressive Generation of Artist-Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12706-12718} }
OralGPT-Omni: A Versatile Dental Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Jing and Liang, Yuci and Lin, Lizhuo and Fan, Yuxuan and Zhou, Wenkai and Guo, Kaixin and Ye, Zanting and Sun, Yanpeng and Zhang, Xinyu and Yang, Yanqi and Li, Qiankun and Tang, Hao and Tsoi, James Kit-Hon and Shen, Linlin and Hung, Kuo Feng}, title = {OralGPT-Omni: A Versatile Dental Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38509-38519} }
VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Yanning and Li, Peiyuan and Liu, Zirui and Wang, Yitong and Ruan, Yanran and Qiu, Jianfeng and Xu, Ke}, title = {VisualAD: Language-Free Zero-Shot Anomaly Detection via Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21346-21356} }
Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jiaqi and Shi, Juntong and Li, Puheng and Ye, Haotian and Guo, Qiushan and Ermon, Stefano}, title = {Adaptive Spectral Feature Forecasting for Diffusion Sampling Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43320-43330} }
Progressive Neural Architecture Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Caiyang and Huang, Chen and Liu, Yun and Tang, Chenwei and Ju, Wei and Lv, Jiancheng}, title = {Progressive Neural Architecture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34524-34534} }
Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Im_2026_CVPR, author = {Im, Jiin and Liu, Sisung and Hong, Je Hyeong}, title = {Shape-of-You: Fused Gromov-Wasserstein Optimal Transport for Semantic Correspondence in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27730-27739} }
TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Rui and Liu, Ziru and Ye, Lingyuan and Lu, Yuxing and Shi, Boxin and Wang, Jinzhuo}, title = {TRIDENT: A Trimodal Cascade Generative Framework for Drug and RNA-Conditioned Cellular Morphology Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26803-26812} }
EDGS: Eliminating Densification for Efficient Convergence of 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kotovenko_2026_CVPR, author = {Kotovenko, Dmytro and Grebenkova, Olga and Ommer, Bj\"orn}, title = {EDGS: Eliminating Densification for Efficient Convergence of 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41065-41076} }
BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Pati_2026_CVPR, author = {Pati, Pushpak and Li, Bo and Khan, Abbas Rayabat and Albuquerque, Tom\'e and Jaensch, Steffen and Mollaysa, Amina and Abdelmoula, Walid M. and Allen, Samantha J. and Reumers, Joke and Mohammad, Helai P. and Oloff, Scott and Mansi, Tommaso and Liao, Rui and Lituiev, Dmytro S. and Xu, Zhoubing}, title = {BiGMINT: Biologically-guided Hierarchical Multimodal Integration for Modeling Multiple Compound Activities in Drug Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6982-6993} }
Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration-
[pdf]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wan, Liang and Deng, Jingjing}, title = {Beyond the Static-World: Lifelong Learning for All-in-One Medical Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13702-13711} }
When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhengyang and Chen, Yu and Zhou, Xin and Li, Xiaofan and Chen, Xiwu and Liang, Dingkang and Bai, Xiang}, title = {When Numbers Speak: Aligning Textual Numerals and Visual Instances in Text-to-Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24268-24278} }
Text-Image Conditioned 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cen_2026_CVPR, author = {Cen, Jiazhong and Fang, Jiemin and Li, Sikuang and Wu, Guanjun and Yang, Chen and Yi, Taoran and Zhou, Zanwei and Bao, Zhikuan and Xie, Lingxi and Shen, Wei and Tian, Qi}, title = {Text-Image Conditioned 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {604-614} }
Soft Modality-Guided Expert Specialization in MoE-VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Zi-Hao and Li, Yaqian and Hou, Anzhou and Takezoe, Rinyoichi and Zhao, Ertao and Pan, Tianxiang and Yan, Jiale and Guang, Mo and Long, Kaiwen}, title = {Soft Modality-Guided Expert Specialization in MoE-VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24330-24340} }
Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Wonseon and Lee, Jaesung and Kim, Dae-Won}, title = {Critical Patch-Aware Sparse Prompting with Decoupled Training for Continual Learning on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17989-17998} }
CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lingen and Wang, Guangzhi and Li, Xiaoyu and Zhang, Zhaoyang and Dou, Qi and Gu, Jinwei and Xue, Tianfan and Shan, Ying}, title = {CubeComposer: Spatio-Temporal Autoregressive 4K 360deg Video Generation from Perspective Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32625-32635} }
Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hubbertz_2026_CVPR, author = {Hubbertz, Michael and Han, Qi and Meisen, Tobias}, title = {Failure Modes for Deep Learning-Based Online Mapping: How to Measure and Address Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39755-39764} }
SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chao_2026_CVPR, author = {Chao, Jun-Jee and Isler, Volkan}, title = {SV-GS: Sparse View 4D Reconstruction with Skeleton-Driven Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5027-5037} }
Streamlined Open-Vocabulary Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Chang and Liao, Dongliang and Ding, Changxing}, title = {Streamlined Open-Vocabulary Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20324-20333} }
Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Mingrui and Yang, Hexiong and Liu, Haogeng and Huang, Huaibo and He, Ran}, title = {Think 360deg: Beyond Depth: Evaluating the Width-centric Reasoning Capability of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5211-5220} }
Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Kailun and Yang, Jianfeng and Tao, Tao and Wu, Wenfei and Jiang, Jiaming and Xiao, Jinsheng}, title = {Multi-Prototype Compactness and Boundary-Aware Synthesis for Unsupervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28524-28533} }
ORBIT: Benchmarking SfM in the Wild with 360deg Video-
[pdf]
[supp]
[bibtex]@InProceedings{Sabour_2026_CVPR, author = {Sabour, Sara and Tucker, Richard and Brubaker, Marcus and Saxena, Saurabh and Hur, Junhwa and Tagliasacchi, Andrea and Sun, Deqing and Fleet, David J. and Szeliski, Richard and Snavely, Noah}, title = {ORBIT: Benchmarking SfM in the Wild with 360deg Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6791-6801} }
TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Beilei and Huang, Yiming and Bai, Long and Ren, Hongliang}, title = {TR2M: Transferring Monocular Relative Depth to Metric Depth with Language Descriptions and Dual-Level Scale-Oriented Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34181-34192} }
VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Tanush and Salehi, Mohammadreza and Park, Jae Sung and Ramanujan, Vivek and Hajishirzi, Hannaneh and Choi, Yejin and Farhadi, Ali and Tripathi, Rohun and Krishna, Ranjay}, title = {VideoNet: A Large-Scale Dataset for Domain-Specific Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12881-12891} }
DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Alex and Dong, Zhiwei and Bai, Qicheng and Zhang, Chenshi and Yi, Yujie and Dai, Guang and Liu, Yong and Wang, Mengmeng}, title = {DynBridge: Bridging Imagination and Control through Interaction Dynamics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22423-22432} }
Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ruiying and Wu, Xueyu and Lei, Jing and Hou, Lu and Ma, Yuanzheng and Li, Xiao-Hui}, title = {Deeper Thought, Weaker Aim: Understanding and Mitigating Perceptual Impairment during Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12064-12073} }
BulletTime: Decoupled Control of Time and Camera Pose for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yiming and Zhang, Qihang and Cai, Shengqu and Wu, Tong and Ackermann, Jan and Kuang, Zhengfei and Zheng, Yang and Raji\v{c}, Frano and Tang, Siyu and Wetzstein, Gordon}, title = {BulletTime: Decoupled Control of Time and Camera Pose for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18319-18330} }
Human Interaction-Aware 3D Reconstruction from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Gwanghyun and Kim, Junghun James and Jeon, Suh Yoon and Park, Jason and Chun, Se Young}, title = {Human Interaction-Aware 3D Reconstruction from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21252-21261} }
Adaptive Action Chunking at Inference-time for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yuanchang and Wang, Xiaobo and Wang, Kai and Wang, Shuo and Peng, Xiaojiang and Chen, Haoyu and Chua, David Kim Huat and Vadakkepat, Prahlad}, title = {Adaptive Action Chunking at Inference-time for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20802-20811} }
ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Chengzhi and Li, Bijun}, title = {ReManNet: A Riemannian Manifold Network for Monocular 3D Lane Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33046-33056} }
MA-Bench: Towards Fine-grained Micro-Action Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kun and Gu, Jihao and Wang, Fei and Wu, Zhiliang and Fan, Hehe and Guo, Dan}, title = {MA-Bench: Towards Fine-grained Micro-Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20118-20128} }
Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinyu and Dong, Yuxuan and Zhang, Lingling and Jia, Chengyou and Dang, ZhuoHang and Yao, Yixing and Wu, Yaqiang and Fernando, Basura and Liu, Jun}, title = {Beyond Layer-Wise Merging: Chain-of-Merging for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24279-24289} }
Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Weikai and Zhang, Jieyu and Jia, Taoyang and Zheng, Chenhao and Gao, Ziqi and Park, Jae Sung and Krishna, Ranjay}, title = {Synthetic Object Compositions for Scalable and Accurate Learning in Detection, Segmentation, and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6388-6398} }
WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulin and Shi, Cheng and Yang, Sibei}, title = {WeaveTime: Streaming from Earlier Frames into Emergent Memory in VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16920-16932} }
Towards Generalized Multimodal Homography Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Jinkun and Cheng, Jiaxin and Zhang, Jie and Zhou, Yicong}, title = {Towards Generalized Multimodal Homography Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8408-8417} }
Self-Attention Driven Tensor Representation for High-Order Data Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhi-Wei and Zheng, Yu-Bang and Li, Heng-Chao}, title = {Self-Attention Driven Tensor Representation for High-Order Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26399-26408} }
FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ganlong and Tang, Zijia and Chen, Xingping and Kuang, Zhanghui and Tian, Ye and Li, Guanbin}, title = {FLARE: A Failure-Aware Framework for Autonomous Correction and Recovery in Visual-Language Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22391-22401} }
SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Rui and Hao, Weidong and Guan, Juntao and Rui, Lai and Wu, Tong and Zeng, Fanhong and Gu, Lin}, title = {SMV-EAR: Bring Spatiotemporal Multi-View Representation Learning into Efficient Event-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6043-6053} }
Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation-
[pdf]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Yangshi and Liu, Zheng and Lu, Feng}, title = {Render-to-Adapt: Unsupervised Personal Adaptation for Gaze Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3101-3110} }
VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shuming and Zhuge, Mingchen and Zhao, Changsheng and Chen, Jun and Wu, Lemeng and Liu, Zechun and Zhu, Chenchen and Cai, Zhipeng and Zhou, Chong and Liu, Haozhe and Chang, Ernie and Suri, Saksham and Xu, Hongyu and Qian, Qi and Wen, Wei and Varadarajan, Balakrishnan and Liu, Zhuang and Xu, Hu and Bordes, Florian and Krishnamoorthi, Raghuraman and Ghanem, Bernard and Chandra, Vikas and Xiong, Yunyang}, title = {VideoAuto-R1: Video Auto Reasoning via Thinking Once, Answering Twice}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32829-32839} }
Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lilin and Guo, Yimo and Li, Yue and Shi, Jiancheng and Liu, Xianggen}, title = {Taming the Long Tail: Rebalancing Adversarial Training via Adaptive Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34900-34907} }
ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Rui and Zhang, Shuo and Tang, Xiaoxuan and Zhang, Ruirui and Liu, Yi and Jiang, Tao and Xu, Wenhao and Li, Yong}, title = {ReFAct: Empowering Multimodal Web Agents with Visual and Context Focusing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14990-14999} }
Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haotian and Long, Feiyue and Yu, Yixin and Xue, Jian and Tang, Haocheng and Xu, Tongda and Shi, Zhenning and Wang, Yan and Ma, Siwei and Zhang, Jiaqi}, title = {Parallax to Align Them All: An OmniParallax Attention Mechanism for Distributed Multi-View Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41322-41331} }
SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Zhanfeng and Zhang, Jiajun and Tu, Hanzhang and Wang, Zhixi and Gao, Yunqi and Zhang, Hongwen and Liu, Yebin}, title = {SharpTimeGS: Sharp and Stable Dynamic Gaussian Splatting via Lifespan Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11798-11807} }
Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhu, Chen and Feng, Xiaokun and Hao, Aiming and Zhu, Jiashu and Lei, Jiachen and Wu, Jiahong and Chu, Xiangxiang and Yang, Jufeng}, title = {Extending One-Step Image Generation from Class Labels to Text via Discriminative Text Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36649-36659} }
VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Rui and Wang, Chuanming and Ma, Huadong}, title = {VidPrism: Heterogeneous Mixture of Experts for Image-to-Video Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31230-31239} }
GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Modi and Zhang, Yiming and Sun, Boyuan and Zhang, Dingwen and Cheng, Ming-Ming and Hou, Qibin}, title = {GeoAgent: Learning to Geolocate Everywhere with Reinforced Geographic Characteristics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41352-41364} }
ViHOI: Human-Object Interaction Synthesis with Visual Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Songjin and Zhong, Linjie and Guo, Ling and Ding, Changxing}, title = {ViHOI: Human-Object Interaction Synthesis with Visual Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30686-30695} }
FILTR: Extracting Topological Features from Pretrained 3D Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Martinez_2026_CVPR, author = {Martinez, Louis and Ovsjanikov, Maks}, title = {FILTR: Extracting Topological Features from Pretrained 3D Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36179-36189} }
MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xun and Zhao, Shijia and Wang, Yunxiang and Lu, Xin and Zhang, Wanfa and Qu, Rongsheng and Li, Weixin and Wang, Yunhong and Wen, Chenglu}, title = {MSGNav: Unleashing the Power of Multi-modal 3D Scene Graph for Zero-Shot Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37154-37163} }
Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawano_2026_CVPR, author = {Kawano, Takumi and Miura, Kohei and Iwai, Daisuke}, title = {Breaking the Scalability Limit of Multi-Projector Calibration with Embedded Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21573-21582} }
A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs-
[pdf]
[supp]
[bibtex]@InProceedings{Stalder_2026_CVPR, author = {Stalder, Nicolas and Grewe, Benjamin F. and Saponati, Matteo and Aceituno, Pau Vilimelis}, title = {A Combination of Noise and Bilateral Filters Achieve Supralinear and Scalable Adversarial Robustness in CNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6559-6568} }
Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chengyang and Lin, Zixuan and Han, Miaolin and Ng, Michael K. and Li, Huibin}, title = {Distilling Quasi-Conformal Mapping: A Generalizable and Efficient Solution for Wide-Angle Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19686-19695} }
UNICBench: UNIfied Counting Benchmark for MLLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rong_2026_CVPR, author = {Rong, Chenggang and Han, Tao and Zhao, Zhiyuan and Fan, Yaowu and Wan, Jia and Guo, Song and Yuan, Yuan and Gao, Junyu}, title = {UNICBench: UNIfied Counting Benchmark for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23730-23740} }
Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huan and Dong, Shuyu and Zheng, Yujin and Wang, Dingwen and Fan, Shenghua and Lyu, Fan}, title = {Subspace Alignment for CLIP-based Continual Learning via Canonical Correlation Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32257-32266} }
CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zhenquan and Huang, Zitong and Zeng, Yihan and Han, Jianhua and Xu, Hang and Feng, Chun-Mei and Ma, Jianwei and Zuo, Wangmeng}, title = {CGL: Advancing Continual GUI Learning via Reinforcement Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15212-15221} }
DVGT: Driving Visual Geometry Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Sicheng and Xie, Zixun and Zheng, Wenzhao and Xu, Shaoqing and Li, Fang and Jiang, Shengyin and Chen, Long and Yang, Zhi-Xin and Lu, Jiwen}, title = {DVGT: Driving Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14658-14668} }
From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Feifan and Chen, Hongyang}, title = {From Feature Learning to Spectral Basis Learning: A Unifying and Flexible Framework for Efficient and Robust Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31377-31388} }
Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zehan and Li, Yaoyi and Zhang, Neng and Cai, Jia}, title = {Diffusion Forcing Planner: History-Annealed Planning with Time-Dependent Guidance for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39796-39805} }
TextFM: Robust Semi-dense Feature Matching with Language Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhihao and Feng, Jinglun and Savaliya, Nirav and Yeh, Zheng-Hang and Lang, Bo and Chuah, Mooi Choo}, title = {TextFM: Robust Semi-dense Feature Matching with Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16635-16644} }
TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Shaoxuan and Cui, Benlei and Huang, Bukun and Ye, Zhizeng and Sun, Yunyun and Huang, Longtao and Xue, Hui and Yang, Yang and Hong, Haiwen and Tang, Jingqun and Zhao, Zhou}, title = {TC-Pade: Trajectory-Consistent Pade Approximation for Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35768-35778} }
Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Chengxin and Li, Yihui and Yang, Hongyu and Wang, YunHong}, title = {Gau-Occ: Geometry-Completed Gaussians for Multi-Modal 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14198-14207} }
Splatent: Splatting Diffusion Latents for Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Hirschorn_2026_CVPR, author = {Hirschorn, Or and Sela, Omer and Huberman-Spiegelglas, Inbar and Efrat, Netalee and Alshan, Eli and Ideses, Ianir and Devernay, Frederic and Zvik, Yochai and Fritz, Lior}, title = {Splatent: Splatting Diffusion Latents for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8319-8330} }
Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Venkataramanan_2026_CVPR, author = {Venkataramanan, Shashanka and Pariza, Valentinos and Salehi, Mohammadreza and Knobel, Lukas and Ramzi, Elias and Gidaris, Spyros and Bursuc, Andrei and Asano, Yuki M}, title = {Franca: Nested Matryoshka Clustering for Scalable Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10533-10544} }
Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Wu, Aming and Zhang, Zihao and Han, Yahong}, title = {Towards Open Environments and Instructions: General Vision-Language Navigation via Fast-Slow Interactive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25184-25192} }
Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xinqi and Liu, Yihao and Wang, Dong and Xiao, Bin}, title = {Red-teaming Retrieval-Augmented Diffusion Models via Poisoning Knowledge Bases}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34961-34970} }
StreamReady: Learning What to Answer and When in Long Streaming Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Azad_2026_CVPR, author = {Azad, Shehreen and Vineet, Vibhav and Rawat, Yogesh S}, title = {StreamReady: Learning What to Answer and When in Long Streaming Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40494-40504} }
ExpPortrait: Expressive Portrait Generation via Personalized Representation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Junyi and Guo, Yudong and Guo, Boyang and Yang, Shengming and Zhang, Juyong}, title = {ExpPortrait: Expressive Portrait Generation via Personalized Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18107-18117} }
Benchmarking Single-Factor Physical Video-to-Audio Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingle and Gururani, Siddharth and Shih, Kevin J. and Bhatt, Gantavya and Lee, Sang-gil and Kong, Zhifeng and Goel, Arushi and Anumanchipalli, Gopala and Liu, Ming-Yu}, title = {Benchmarking Single-Factor Physical Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1939-1949} }
UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengfei and Li, Peng and Zhang, Zheng and Lu, Jiahao and Zhao, Chengfeng and Xue, Wei and Liu, Qifeng and Peng, Sida and Zhang, Wenxiao and Luo, Wenhan and Liu, Yuan and Guo, Yike}, title = {UniSH: Unifying Scene and Human Reconstruction in a Feed-Forward Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14038-14049} }
V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Sen and Zhang, Jie and Yan, Jianxin and Shan, Shiguang and Chen, Xilin}, title = {V-Attack: Targeting Disentangled Value Features for Controllable Adversarial Attacks on LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42257-42267} }
SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruosen and Zhang, Zhikang and Xu, Jialei and Chang, Jiahao and Chen, Dong and Li, Lingyun and Sun, Weijian and Wei, Zizhuang}, title = {SpaceMind: Camera-Guided Modality Fusion for Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16811-16822} }
Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elisha_2026_CVPR, author = {Elisha, Yehonatan and Barkan, Oren and Koenigstein, Noam}, title = {Concept-Guided Fine-Tuning: Steering ViTs away from Spurious Correlations to Improve Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17035-17045} }
3D Space as a Scratchpad for Editable Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saha_2026_CVPR, author = {Saha, Oindrila and Krs, Vojtech and Mech, Radomir and Maji, Subhransu and Gadelha, Matheus and Blackburn-Matzen, Kevin}, title = {3D Space as a Scratchpad for Editable Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29233-29243} }
Confusion-Aware Spectral Regularizer for Long-Tailed Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziquan and Jin, Gaojie and Zhu, Hanruo and Lu, Si-Yuan and Zhang, Yunxiao and Fu, Zeyu and Mu, Ronghui and Zhang, Guoqiang and Sun, Zhao and Xia, Yuhang and Shang, Jiaxing and Li, Xiang and Liu, Lu and Huang, Tianjin}, title = {Confusion-Aware Spectral Regularizer for Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28712-28722} }
SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Senyu and Wang, Siyin and Ji, Li and Li, Ao and Zhang, Shiduo and Liu, Liming and Hou, Jinlong and Gong, Jingjing and Zhao, Xianzhong and Qiu, Xipeng}, title = {SRPO: Self-Referential Policy Optimization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6718-6728} }
FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zipeng and Xu, Dan}, title = {FlashVGGT: Efficient and Scalable Visual Geometry Transformers with Compressed Descriptor Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21826-21835} }
Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qihao and Mao, Chengzhi and Liu, Yaojie and Yuille, Alan and Chu, Wen-Sheng}, title = {Differences That Matter: Auditing Models for Capability Gap Discovery and Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1639-1650} }
Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Linge and Chen, Yingying and Zhu, Bingke and Zhou, Lu and Wang, Jinqiao}, title = {Semantic Noise Reduction via Teacher-Guided Dual-Path Audio-Visual Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32005-32014} }
The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chopra_2026_CVPR, author = {Chopra, Shivang and Halbe, Shaunak and Huang, Chengyue and Maneechotesuwan, Brisa and Kira, Zsolt}, title = {The Geometry of Robustness: Optimizing Loss Landscape Curvature and Feature Manifold Alignment for Robust Finetuning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22133-22142} }
PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Derui and Qiao, Qian and Lu, Hao and Du, Tao and Lu, Peng}, title = {PolarGuide-GSDR: 3D Gaussian Splatting Driven by Polarization Priors and Deferred Reflection for Real-World Reflective Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26000-26009} }
Parallelised Differentiable Straightest Geodesics for 3D Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Verninas_2026_CVPR, author = {Verninas, Hippolyte and Korkmaz, Caner and Zafeiriou, Stefanos and Birdal, Tolga and Foti, Simone}, title = {Parallelised Differentiable Straightest Geodesics for 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14637-14647} }
PhysInOne: Visual Physics Learning and Reasoning in One Suite-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Siyuan and Wang, Hejun and Cheng, Hu and Li, Jinxi and Wang, Dongsheng and Jiang, Junwei and Jin, Yixiao and Huang, Jiayue and Mao, Shiwei and Liu, Shangjia and Yang, Yafei and Song, Hongkang and Wei, Shenxing and Zhang, Zihui and Wang, Bing and Wang, Zhihua and Zou, Chuhang and Yang, Bo}, title = {PhysInOne: Visual Physics Learning and Reasoning in One Suite}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33131-33142} }
Fusion of Depth and Semantics for Probabilistic Floorplan Localization-
[pdf]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Kecheng and Chen, Mao and Zhang, Xiangkai and Yang, Xu}, title = {Fusion of Depth and Semantics for Probabilistic Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19413-19422} }
FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiarui and Duan, Huiyu and Wang, Juntong and Min, Xiongkuo}, title = {FVBench: Benchmarking Deepfake Video Detection Capability of Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4425-4437} }
FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xijie and Xu, Chengming and Luo, Donghao and Hu, Xiaobin and Tang, Peng and Peng, Xu and Zhang, Jiangning and Wang, Chengjie and Fu, Yanwei}, title = {FFP-300K: Scaling First-Frame Propagation for Generalizable Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23172-23181} }
OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bello_2026_CVPR, author = {Bello, Hymalai and Ray, Lala and Sorysz, Joanna and Suh, Sungho and Lukowicz, Paul}, title = {OpenMarcie: Dataset for Multimodal Action Recognition in Industrial Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20129-20138} }
MTA: Multimodal Task Alignment for BEV Perception and Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yunsheng and Yaman, Burhaneddin and Ye, Xin and Luo, Jingru and Tao, Feng and Mallik, Abhirup and Wang, Ziran and Ren, Liu}, title = {MTA: Multimodal Task Alignment for BEV Perception and Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {670-679} }
GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Ken and Yang, Yunhan and Sun, Jingxiang and Liu, Xihui and Liu, Yebin and Liang, Ding and Cao, Yan-Pei}, title = {GeoSAM2: Unleashing the Power of SAM2 for 3D Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6367-6376} }
Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ki_2026_CVPR, author = {Ki, Taekyung and Jang, Sangwon and Jo, Jaehyeong and Yoon, Jaehong and Hwang, Sung Ju}, title = {Avatar Forcing: Real-Time Interactive Head Avatar Generation for Natural Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18074-18084} }
Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Kai and Yu, Weichen and Zhang, Li and Robey, Alexander and Zou, Andy and Hu, Haoqi and Xu, Chengming and Fredrikson, Matt}, title = {Omni-Attack: Adversarial Attacks on Open-Ended VQA in Black-Box Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42341-42351} }
Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Hee Min and Kang, Hyoa and Kim, Suji and Oh, Dokwan and Cho, Nam Ik}, title = {Time Without Time: Pseudo-Temporal Representation for Space-Time Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6812-6822} }
Unsafe2Safe: Controllable Image Anonymization for Downstream Utility-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2026_CVPR, author = {Dinh, Minh and Jin, SouYoung}, title = {Unsafe2Safe: Controllable Image Anonymization for Downstream Utility}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3326-3336} }
No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Lizhi and Li, Jun and Li, Ziqiang and Jiang, Weiwei and Fu, Zhangjie}, title = {No Way To Steal My Face: Proactive Defense Against Identity-Preserving Personalized Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20680-20690} }
From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haoyuan and Wang, Keyao and Zhang, Guosheng and Yue, Haixiao and Tan, Zhiwen and Peng, Siran and Zhang, Tianshuo and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Liu, Ajian and Zhu, Xiangyu and Lei, Zhen}, title = {From Intuition to Investigation: A Tool-Augmented Reasoning MLLM Framework for Generalizable Face Anti-Spoofing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40855-40865} }
GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Omri_2026_CVPR, author = {Omri, Yasmine and Ding, Connor and Weissman, Tsachy and Tambe, Thierry}, title = {GaussianVision: Vision-Language Alignment from Compressed Image Representations using 2D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14926-14935} }
Learning 3D Reconstruction with Priors in Test Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Lei and Wu, Haoyu and Dave, Akshat and Samaras, Dimitris}, title = {Learning 3D Reconstruction with Priors in Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36550-36560} }
Domain Sensitive Federated Learning with Fisher-Informed Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Chenchen and Yuan, Wenhao and Xu, Zhengji and Wang, Xuehe}, title = {Domain Sensitive Federated Learning with Fisher-Informed Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17535-17544} }
EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Taegyoon and Han, Yegyu and Ji, Seojin and Park, Jaewoo and Kim, Sojeong and Kwon, Taein and Kim, Hyung-Sin}, title = {EgoXtreme: A Dataset for Robust Object Pose Estimation in Egocentric Views under Extreme Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40696-40706} }
Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Tan, Pei and Li, Siqi and Zou, Changqing and Gao, Yue}, title = {Hyper-PCN: Hypergraph-Based Point Cloud Completion via High-Order Correlation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39121-39130} }
PAVAS: Physics-Aware Video-to-Audio Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Hyun-Bin_2026_CVPR, author = {Hyun-Bin, Oh and Takida, Yuhta and Uesaka, Toshimitsu and Oh, Tae-Hyun and Mitsufuji, Yuki}, title = {PAVAS: Physics-Aware Video-to-Audio Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14481-14491} }
From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Yuyang and Shen, Yixuan and Zhu, Shengjie and Kong, Yu and Liu, Feng}, title = {From 3D Pose to Prose: Biomechanics-Grounded Vision-Language Coaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23506-23515} }
FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanxi and Men, Yifang and Lian, Zhouhui}, title = {FISHuman: Fine-grained Single-image 3D Human Reconstruction via Multi-view 4D Remeshing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42765-42776} }
Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Shengkai and Cheng, Zhiyong and Zhang, Zefan and Dong, Jianfeng and Li, Zhihui and Wang, Meng}, title = {Exploring Adaptive Masked Reconstruction for Self-Supervised Skeleton-Based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13974-13983} }
Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zaijing and Hu, Bing and Shao, Rui and Chen, Gongwei and Jiang, Dongmei and Xie, Pengwei and Hao, Jianye and Nie, Liqiang}, title = {Global Prior Meets Local Consistency: Dual-Memory Augmented Vision-Language-Action Model for Efficient Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35135-35145} }
VGGT-ohm-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jianyuan and Chen, Minghao and Zhang, Shangzhan and Karaev, Nikita and Sch\"onberger, Johannes and Labatut, Patrick and Bojanowski, Piotr and Novotny, David and Vedaldi, Andrea and Rupprecht, Christian}, title = {VGGT-ohm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21486-21499} }
3D-IDE: 3D Implicit Depth Emergent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chushan and Lu, Ruihan and Tong, Jinguang and Wang, Yikai and Li, Hongdong}, title = {3D-IDE: 3D Implicit Depth Emergent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23837-23847} }
CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Shihao and Wei, Wei}, title = {CoRiM: Conflict-driven Risk Minimization for Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37821-37830} }
Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Minghao and Hu, Wenbo and Xu, Jiale and Shan, Ying and Han, Kai}, title = {Sculpt4D: Generating 4D Shapes via Sparse-Attention Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27188-27198} }
StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mengmeng and Liu, Jiuming and Yang, Michael Ying and Jiang, Chaokang and Li, Jiangtao and Zhang, Yunpeng and Wang, Hesheng and Nex, Francesco and Cheng, Hao}, title = {StreamVLO: Streaming Visual-LiDAR Odometry with Cumulative Drift Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39086-39097} }
FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruonan and Wang, Zheng and Liu, Debin and Lv, Shijie and Yang, Laurence Tianruo}, title = {FedARA: Resource-adaptive Low-rank Personalized Federated Learning via Anchor-driven Representation Alignment on Heterogeneous Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10357-10366} }
Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Maoxun and Meng, Duanni and Xi, Ziteng and Zhao, Tianyi and Zhao, Shiji and Dai, Yimian and Wei, Xingxing}, title = {Seeing Through the Noise: Improving Infrared Small Target Detection and Segmentation from Noise Suppression Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27783-27792} }
MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zheng and Zhang, Qinchuan and Ye, Yuteng and Chen, Zhi and Ji, Penglei and Li, Mengfei and Zhang, Wenxiao and Liu, Yuan}, title = {MV2UV: Generating High-quality UV Texture Maps with Multiview Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12684-12694} }
WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Zhao, Xinyan}, title = {WhisperNet: A Scalable Solution for Bandwidth-Efficient Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32154-32163} }
One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Shijun and Xu, Jing and Li, Zhihang and Peng, Chunli and Yang, Xiaoda and Lu, Lijing and Hu, Kai and Zhang, Jiangning}, title = {One-to-All Animation: Alignment-Free Character Animation and Image Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4011-4021} }
SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Li, Liupeng and Wang, Jinpeng and Kang, Haoqian and Feng, Yan and Chen, Ke and Wang, Yaowei}, title = {SegCompass: Exploring Interpretable Alignment with Sparse Autoencoders for Enhanced Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19132-19142} }
DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yuxiang and Li, Zhe and Wang, Yanwen and Zhu, Hao and Cao, Xun and Liu, Ligang}, title = {DeX-Portrait: Disentangled and Expressive Portrait Animation via Explicit and Latent Motion Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40009-40019} }
AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Baicheng and Zhang, Mingda and Zhang, Min and Li, Haizhou and Wu, Baoyuan}, title = {AdapAction: Adaptive Target Action Backdoor Attack against GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27894-27905} }
MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Wall and Song, Chaeyoung and Kim, Hanul}, title = {MultiModalPFN: Extending Prior-Data Fitted Networks for Multimodal Tabular Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30357-30367} }
VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Linfeng and Wang, Yeda and Gong, Meiqi and Li, Zizhuo and Deng, Yuxin and Yi, Xunpeng and Li, Chunyu and Xu, Han and Zhang, Hao and Ma, Jiayi}, title = {VideoFusion: A Spatio-Temporal Collaborative Network for Multi-modal Video Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19559-19569} }
Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Bing and Wang, Xiaoli and Lu, Gui-Fu and Li, Zechao}, title = {Multi-Hierarchical Contrastive Spectral Fusion for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39617-39626} }
Causal Motion Diffusion Models for Autoregressive Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Qing and Watanabe, Akihisa and Fujiwara, Kent}, title = {Causal Motion Diffusion Models for Autoregressive Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38366-38375} }
GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xutao and Li, Jiarui and Liu, Junwen and Ren, Yonggong}, title = {GeoSemba: Reconstructing State Space Model for Cross Paradigm Representation in Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29990-29999} }
Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Peng and Xie, Jun and Lin, Tao}, title = {Rethinking UMM Visual Generation: Masked Modeling for Efficient Image-Only Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2047-2057} }
GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Wenbo and Xia, Wenke and Zhang, Weitao and Hu, Di}, title = {GeCo-SRT: Geometry-aware Continual Adaptation for Cross-Task Sim-to-Real Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42408-42417} }
Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Kao_2026_CVPR, author = {Kao, Chia-Hsiang and Huynh, Cong Phuoc and Wang, Chien-Yi and Vesdapunt, Noranart and Stojanov, Stefan and Hariharan, Bharath and Obiednikov, Oleksandr and Zhou, Ning}, title = {Dynamics: Language-Based Representation for Inferring Rigid-Body Dynamics From Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42364-42374} }
E$^2$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Senyao and Wang, Haozhao and Jiang, Zhaobai and Jin, Zhanbo and Fan, Hao and Li, Ruixuan}, title = {E\${\textasciicircum}2\$-SCI: Elastic Edge-Cloud Speculative Decoding via Credit Inertia}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12944-12954} }
EgoAVU: Egocentric Audio-Visual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seth_2026_CVPR, author = {Seth, Ashish and Mei, Xinhao and Zhao, Changsheng and Nagaraja, Varun and Chang, Ernie and Meyer, Gregory P. and Le Lan, Gael and Xiong, Yunyang and Chandra, Vikas and Shi, Yangyang and Manocha, Dinesh and Cai, Zhipeng}, title = {EgoAVU: Egocentric Audio-Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15805-15814} }
CARD: Correlation Aware Restoration with Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Nezakati_2026_CVPR, author = {Nezakati, Niki and Ghosh, Arnab and Roy-Chowdhury, Amit and Saragadam, Vishwanath}, title = {CARD: Correlation Aware Restoration with Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16531-16540} }
3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Mingyun and Yoon, Seongro and Bremond, Francois and Cho, Donghyeon}, title = {3D Gaussian Splatting at Arbitrary Resolutions with Compact Proxy Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18991-19000} }
MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Yunpeng and Wang, Lihan and He, Zhaoshen and He, Xinqiang and Liao, Xingming and Wang, Zhuowei and Cheng, Lianglun}, title = {MMVIP: A Visible-infrared Paired Dataset for Multi-weather Marine Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6432-6442} }
Specificity-aware reinforcement learning for fine-grained open-world classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Angheben_2026_CVPR, author = {Angheben, Samuele and Berasi, Davide and Conti, Alessandro and Ricci, Elisa and Wang, Yiming}, title = {Specificity-aware reinforcement learning for fine-grained open-world classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41467-41477} }
Forensic-Friendly Image Manipulation via Controllable Latent Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Wu, Haiwei and Tian, Jinyu and LI, Jianqing and Zhou, Jiantao}, title = {Forensic-Friendly Image Manipulation via Controllable Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35394-35404} }
TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Yifeng and Chen, Zhirong and Song, Bo and Cheng, Erkang and Ling, Haibin}, title = {TopoHR: Hierarchical Centerline Representation for Cyclic Topology Reasoning in Driving Scenes with Point-to-Instance Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18161-18170} }
Thinking with Programming Vision: Towards a Unified View for Thinking with Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Zirun and Hong, Minjie and Zhang, Feng and Jia, Kai and Jin, Tao}, title = {Thinking with Programming Vision: Towards a Unified View for Thinking with Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33467-33476} }
Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Fei and Zhang, Xiwen and Qiu, Qingqing and Zhang, Lei and Wei, Wei and Ding, Chen and Zhang, Yi and Li, Liang and Yue, Xiangyu and Zhang, Yanning}, title = {Language Does Matter for Cross-Domain Few-Shot Visual Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7946-7956} }
Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jintu and Liu, Qizhe and Xu, Huangxin and Chen, Zhuojie}, title = {Pip-Stereo: Progressive Iterations Pruner for Iterative Optimization based Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7503-7512} }
Fast Reasoning Segmentation for Images and Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Unberath, Mathias}, title = {Fast Reasoning Segmentation for Images and Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34765-34774} }
Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yuchen and Zhang, Zhenyu and Gu, Naibin and Chen, Yilong and Fu, Peng and Lin, Zheng and Wang, Shuohuan and Sun, Yu and Wu, Hua and Wang, Weiping and Wang, Haifeng}, title = {Blink: Dynamic Visual Token Resolution for Enhanced Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3511-3521} }
Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xin and Song, Meixi and Zhang, Dizhe and Lu, Wenxuan and Li, Haodong and Du, Bo and Yang, Ming-Hsuan and Nguyen, Truong and Qi, Lu}, title = {Depth Any Panoramas: A Foundation Model for Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26835-26844} }
Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayang and Wang, Pin and Wang, Hongbo and Liu, Xinyue and Huang, Huaibo and He, Ran}, title = {Towards Fine-Grained Attribution: Instance-Aware Preference Optimization for Aligning Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43155-43164} }
Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Liao and Jiang, Wentao and Zhu, Yiran and Li, Jiahe and Ge, Tiezheng and Cao, Zhiguo and Zheng, Bo}, title = {Identity-Preserving Image-to-Video Generation via Reward-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27281-27290} }
MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Wonjoon and Woo, Sungmin and Kim, Donghyeong and Lee, Jungho and Park, Sangheon and Lee, Sangyoun}, title = {MoRGS: Efficient Per-Gaussian Motion Reasoning for Streamable Dynamic 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41044-41053} }
CREward: A Type-Specific Creativity Reward Model-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jiyeon and Mahdavi-Amiri, Ali and Zhang, Hao and Jeong, Haedong}, title = {CREward: A Type-Specific Creativity Reward Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21932-21941} }
TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Wenxuan and Sun, Yulin and Chen, Aiyue and Lin, Jing and Yao, Yiwu and Gan, Yiming and Zhao, Jieru and Leng, Jingwen and Guo, Minyi and Feng, Yu}, title = {TimeRipples: Accelerating vDiTs by Understanding the Spatio-Temporal Correlations in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25688-25698} }
PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Price_2026_CVPR, author = {Price, Stephen and Cote, Danielle L. and Rundensteiner, Elke A.}, title = {PromptMoE: A Segmentation Refinement Framework Leveraging Mixture of Experts for Improved Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6325-6335} }
Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongji and Yu, Manjiang and Yao, Junchi and Singh, Priyanka and Li, Xue and Wang, Di and Hu, Lijie}, title = {Towards Reasoning-Preserving Unlearning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10251-10261} }
GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Liu, Yuchen and Lu, Haoyu and Xia, Zhiqiang and Wang, Hongzhen and Han, Kaiyang and Yang, Changpeng and Wu, Jinyang and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {GUI-CEval: A Hierarchical and Comprehensive Chinese Benchmark for Mobile GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20303-20312} }
TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Zhang, Yu and Mi, Siya and Wei, Xiu-Shen}, title = {TAR: Token-Aware Refinement for Fine-grained Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31995-32004} }
SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhening and Jeong, Hyeonho and Chen, Xuelin and Gryaditskaya, Yulia and Wang, Tuanfeng Y. and Lasenby, Joan and Huang, Chun-Hao}, title = {SpaceTimePilot: Generative Rendering of Dynamic Scenes Across Space and Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11218-11228} }
TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Jeimin and Lee, Hyunju and Ham, Bumsub}, title = {TAS-LoRA: Transformer Architecture Search with Mixture-of-LoRA Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20243-20252} }
Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yudi and Zhou, Zhongao and Yang, Bin and Chen, Zhenghan and Ye, Mang}, title = {Towards Cross-Modal Preservation, Consistency and Alignment for Privacy-Preserving Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11273-11282} }
Geometrically-Constrained Agent for Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zeren and Lu, Xiaoya and Zheng, Zhijie and Li, Pengrui and He, Lehan and Zhou, Yijin and Shao, Jing and Zhuang, Bohan and Sheng, Lu}, title = {Geometrically-Constrained Agent for Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38689-38699} }
Beyond Reassembly: Fractured Object Recovery with Missing Parts-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qun-Ce and Li, Jiahui and Cao, Yan-Pei and Cheng, Weihao and Mu, Tai-Jiang and Shan, Ying and Li, Chuan and Chen, Da and Yang, Yong-Liang and Hu, Shi-min}, title = {Beyond Reassembly: Fractured Object Recovery with Missing Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20488-20498} }
Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qitong and Feng, Mingtao and Wu, Zijie and Zhu, Huixin and Dong, Weisheng and Wang, Yaonan and Mian, Ajmal}, title = {Learning Hierarchical Hyperbolic Mixture Model for Part-aware 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12695-12705} }
Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ponbagavathi_2026_CVPR, author = {Ponbagavathi, Thinesh Thiyakesan and Seibold, Constantin and Roitberg, Alina}, title = {Frame2Freq: Spectral Adapters for Fine-Grained Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24073-24083} }
More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaowen and Li, Jing and Huo, Hongtao and Cao, Haozhe and Wang, Renhua and Dong, Xu}, title = {More Than Meets the Eye: A Unified Image Fusion Framework via Semantic-Pixel Entropy Trade-off for Zero-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41510-41520} }
From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{George_2026_CVPR, author = {George, Daniel and Yeh, Charles and Lee, Daniel and Zhang, Yifei}, title = {From Measurement to Mitigation: Quantifying and Reducing Identity Leakage in Image Representation Encoders with Linear Subspace Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3284-3293} }
CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dongyu and Chen, Dar-Yen and Song, Yi-Zhe}, title = {CaricHarmony: Contrastive Diffusion Paths for Identity-Preserving Caricature Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36126-36135} }
PhyCritic: Multimodal Critic Models for Physical AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Wang, Shihao and Liu, Guilin and Dong, Yi and Li, Ming and Huang, Heng and Kautz, Jan and Yu, Zhiding}, title = {PhyCritic: Multimodal Critic Models for Physical AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36881-36892} }
UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Wei and Wu, Yexinrui and Chen, Xinli and Li, Xinran and Chen, Shi}, title = {UI-Lens: Assessing General MLLMs' Potential to Automate UI Display Quality Assurance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25882-25892} }
SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Zepeng and Li, Kaiyu and Chen, Luodi and Li, Wanchen and Yuchen, Xiao and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {SegEarth-R2: Towards Comprehensive Language-guided Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13199-13210} }
Scaling Parallel Sequence Models to Vision Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yitong and McCarthy, Collin and Wang, Hongjun and Ye, Hanrong and Dou, Qi and Xue, Tianfan and Gu, Jinwei and Kautz, Jan and Yin, Hongxu and Molchanov, Pavlo and Liu, Sifei}, title = {Scaling Parallel Sequence Models to Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41332-41341} }
DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Xin and You, Zhiyuan and Zhang, Zhoutong and Xue, Tianfan}, title = {DA-VAE: Plug-in Latent Compression for Diffusion via Detail Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18703-18713} }
AvatarPointillist: AutoRegressive 4D Gaussian Avatarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hongyu and Wang, Xuan and Wu, Zijian and Wang, Yating and Wan, Ziyu and Ma, Yue and Liu, Runtao and Zhou, Boyao and Shen, Yujun and Chen, Qifeng}, title = {AvatarPointillist: AutoRegressive 4D Gaussian Avatarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11039-11050} }
IPR-1: Interactive Physical Reasoner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingyu and Zhuo, Lifeng and Tan, Tianxi and Xie, Guocan and Nie, Xian and Li, Yan and Zhao, Renjie and He, Zizhu and Wang, Ziyu and Cai, Jiting and Li, Yong-Lu}, title = {IPR-1: Interactive Physical Reasoner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33415-33425} }
Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bouzidi_2026_CVPR, author = {Bouzidi, Halima and Liu, Haoyu and Achamyeleh, Yonatan and Iddamsetty, Praneetsai and Al Faruque, Mohammad}, title = {Out of Sight, Out of Track: Adversarial Attacks on Propagation-based Multi-Object Trackers via Query State Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13326-13335} }
Making Training-Free Diffusion Segmentors Scale with the Generative Power-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Benyuan and Xu, Qianqian and Wang, Zitai and Cao, Xiaochun and Huang, Longtao and Huang, Qingming}, title = {Making Training-Free Diffusion Segmentors Scale with the Generative Power}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35861-35871} }
InternVideo-Next: Towards World-Understanding Video Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenting and Zhu, Yuhan and Xu, Yicheng and Yang, Jiange and Yan, Ziang and Wang, Yali and Wang, Yi and Wang, Limin}, title = {InternVideo-Next: Towards World-Understanding Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16966-16976} }
VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongkang and She, Dongyu and Ji, Baiyu and Geng, Qichuan and Zhou, Zhong and Wang, Yan}, title = {VMD-FACT: A New Video Dataset and MLLM-based method for Detecting Realistic AI-Generated Video Misinformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21284-21294} }
GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zehao and Liu, An and Wang, Yan}, title = {GS-CLIP: Zero-shot 3D Anomaly Detection by Geometry-Aware Prompt and Synergistic View Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35587-35596} }
VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargent_2026_CVPR, author = {Sargent, Kyle and Gao, Ruiqi and Henzler, Philipp and Herrmann, Charles and Holynski, Aleksander and Fei-Fei, Li and Wu, Jiajun and Zhang, Jason Y.}, title = {VLIC: Vision-Language Models As Perceptual Judges for Human-Aligned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10462-10471} }
ReMatch: Boosting Representation through Matching for Multimodal Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qianying and Liang, Xiao and Zhang, Zhiqiang and Chen, Yibo and Tang, Xu and Qing, Zhongfei and Zhou, Fengfan and Hu, Yao and Henderson, Paul}, title = {ReMatch: Boosting Representation through Matching for Multimodal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16823-16833} }
VGG-T$^3$: Offline Feed-Forward 3D Reconstruction at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elflein_2026_CVPR, author = {Elflein, Sven and Li, Ruilong and Agostinho, S\'ergio and Gojcic, Zan and Leal-Taix\'e, Laura and Zhou, Qunjie and Osep, Aljosa}, title = {VGG-T\${\textasciicircum}3\$: Offline Feed-Forward 3D Reconstruction at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36464-36474} }
SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs-
[pdf]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Ran and Hu, Haoxiang and Pei, Chenxi and Liu, Yanxuan and Qiang, Wenwen and Liu, Fang and Deng, Xiaoming and Ma, Cuixia and Liu, Yong-Jin}, title = {SketchRevive: Fine-Grained Pixel-to-Vector Sketch Completion with Diffusion-Prior-Guided Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43165-43174} }
Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zichuan and Yang, Songlin and Peng, Bo and Tang, Zhenchen and Li, Yang and Dong, Beibei and Dong, Jing}, title = {Same Attention, Different Truths: Put Logit-Lens over Visual Attention to Detect and Mitigate LVLM Object Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25315-25325} }
Emergent Extreme-View Geometry in 3D Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiwen and Tung, Joseph and Cai, Ruojin and Fouhey, David and Averbuch-Elor, Hadar}, title = {Emergent Extreme-View Geometry in 3D Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36411-36421} }
HiconAgent: History Context-aware Policy Optimization for GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xurui and Chen, Gongwei and Xie, Yuquan and Li, Zaijing and Zhou, Kaiwen and Wang, Shuai and Yang, Shuo and Tian, Zhuotao and Shao, Rui}, title = {HiconAgent: History Context-aware Policy Optimization for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13028-13038} }
CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Andrew and Kim, Jaemin and Lee, Sebin and Yoon, Sung-Eui}, title = {CLaD: Planning with Grounded Foresight via Cross-Modal Latent Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {966-975} }
Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ao and Jiang, Zhiwei and Cheng, Zifeng and Wang, Cong and Yin, Yafeng and Yang, Shufan and Gu, Qing}, title = {Rethinking BCE Loss for Multi-Label Image Recognition with Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38784-38793} }
SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Sheng and Zhai, Di-Hua and Xia, Yuanqing}, title = {SE(3)-Equivariance with Geometric and Topological Guidance for Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35114-35123} }
Unblur-SLAM: Dense Neural SLAM for Blurry Inputs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Rozumny, Denis and Girlanda, Francesco and Karaoglu, Sezer and Pollefeys, Marc and Gevers, Theo and Oswald, Martin R.}, title = {Unblur-SLAM: Dense Neural SLAM for Blurry Inputs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {352-362} }
InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gilo_2026_CVPR, author = {Gilo, Daniel and Litany, Or}, title = {InstructMix2Mix: Consistent Sparse-View Editing Through Multi-View Model Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29000-29011} }
GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Qiaosi and Li, Shuai and Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Zhang, Lei}, title = {GDPO-SR: Group Direct Preference Optimization for One-Step Generative Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2177-2187} }
SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jiho and Choi, Sieun and Seo, Jaeyoon and Sohn, Minho and Kim, Yeana and Kim, Jihie}, title = {SEA: Evaluating Sketch Abstraction Efficiency via Element-level Commonsense Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31652-31661} }
POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yaohou and Wang, Qingzhong and Huang, Yongsong and Liu, Junyi and Miyazaki, Tomo and Omachi, Shinichiro}, title = {POCA: Pareto-Optimal Curriculum Alignment for Visual Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21891-21900} }
SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yunnan and Zheng, Kecheng and Wang, Jianyuan and Chen, Minghao and Novotny, David and Rupprecht, Christian and Xu, Yinghao and Zhu, Xing and Zeng, Wenjun and Jin, Xin and Shen, Yujun}, title = {SceneScribe-1M: A Large-Scale Video Dataset with Comprehensive Geometric and Semantic Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12628-12639} }
AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Handong and Liu, Zikang and Guo, Longteng and Yue, Tongtian and Tang, Yepeng and Zhu, Xinxin and Zheng, Chuanyang and Wang, Ziming and Wang, Zhibin and Song, Jun and Yu, Cheng and Zheng, Bo and Liu, Jing}, title = {AdaSpark: Adaptive Sparsity for Efficient Long-Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40548-40558} }
Solvability of the Viewing Graph Under the Affine Camera Model-
[pdf]
[supp]
[bibtex]@InProceedings{Pedroni_2026_CVPR, author = {Pedroni, Gabriele and Madhavan, Rakshith and Arrigoni, Federica}, title = {Solvability of the Viewing Graph Under the Affine Camera Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26731-26740} }
Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saini_2026_CVPR, author = {Saini, Shreshth and Chen, Bowen and Wang, Yilin and Birkbeck, Neil and Adsumilli, Balu and Bovik, Alan C.}, title = {Seeing Beyond 8bits: Subjective and Objective Quality Assessment of HDR-UGC Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15538-15549} }
Direction-aware 3D Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Quan and Xuan, Weihao and Wang, Junjue and Yokoya, Naoto and Shao, Ling and Lu, Shijian}, title = {Direction-aware 3D Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9668-9678} }
DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Patrick and Chen, Chen}, title = {DreamingComics: A Story Visualization Pipeline via Subject and Layout Customized Generation using Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36769-36780} }
DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yiming and Yang, Hongkun and Wang, Lionel Z. and Chen, Bin and Xian, Weizhi and Teng, Jianzhi}, title = {DeAR: Fine-Grained VLM Adaptation by Decomposing Attention Head Roles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31514-31523} }
Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhongxing and Wang, Zhonghua and Qian, Zhe and Shi, Dachuan and Tang, Feilong and Hu, Ming and Su, Shiyan and Zou, Xiaocheng and Feng, Wei and Mahapatra, Dwarikanath and Peng, Yifan and Lin, Minquan and Ge, Zongyuan}, title = {Thinking in Uncertainty: Mitigating Hallucinations in MLRMs with Latent Entropy-Aware Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11064-11075} }
Hierarchical Action Learning for Weakly-Supervised Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Junxian and Cai, Ruichu and Fang, Juntao and Zhu, Hao and Xu, Boyan and Chen, Weilin and Li, Zijian and Gao, Shenghua}, title = {Hierarchical Action Learning for Weakly-Supervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6054-6064} }
Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Sun, Fuming and Zhao, Yu and Kong, Yuqiu and Sun, Jing and Wang, Fasheng}, title = {Seeing Both Sides: Towards Bidirectional Semantic Alignment for Open-Vocabulary Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27655-27664} }
ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Youwang_2026_CVPR, author = {Youwang, Kim and Hyoseok, Lee and Subin, Park and Pons-Moll, Gerard and Oh, Tae-Hyun}, title = {ELITE: Efficient Gaussian Head Avatar from a Monocular Video via Learned Initialization and Test-time Generative Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40152-40162} }
ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jiangtong and Liu, Lin and Huang, Jie and Zhang, Xiaopeng and Tian, Qi and Zhao, Feng}, title = {ParaUni: Enhance Generation in Unified Multimodal Model with Reinforcement-driven Hierarchical Parallel Information Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41836-41846} }
MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Geonmo and Heo, Byeongho and Yu, Jaemyung and Hwang, Jaehui and Kim, Taekyung and Lee, Sangmin and Jun, HeeJae and Kang, Yoohoon and Yun, Sangdoo and Han, Dongyoon}, title = {MuCo: Multi-turn Contrastive Learning for Multimodal Embedding Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1749-1758} }
Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qi and Chen, Jixuan and Zhang, Kaiyi and Yu, Xinquan and Chan, Antoni B. and Huang, Hui}, title = {Multi-view Crowd Tracking Transformer with View-Ground Interactions Under Large Real-World Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13626-13635} }
Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yancheng and Zhang, Xiaohan and Sun, Guangyu and Lyu, Zonglin and Wshah, Safwan and Chen, Chen}, title = {Geo2: Geometry-Guided Cross-view Geo-Localization and Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19432-19442} }
Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Geon and Yoon, Hangyul and Shin, Hyunju and Park, Hyunki and Seo, Sang Hoon and Yang, Eunho and Choi, Edward}, title = {Instruction-Guided Lesion Segmentation for Chest X-rays with Automatically Generated Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1482-1492} }
Efficiency Follows Global-Local Decoupling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenyu and Pei, Gensheng and Chen, Tao and Zhou, Yichao and Zhou, Tianfei and Yao, Yazhou and Shen, Fumin}, title = {Efficiency Follows Global-Local Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25524-25535} }
Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams-
[pdf]
[supp]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Yincheng and Shi, Enze and Zhang, Shu}, title = {Decoding 3D Perception via BrainSSD: Synergistic Fusion of EEG Representations from Static and Dynamic Visual Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42711-42721} }
Lifting Unlabeled Internet-level Data for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yixin and Zhang, Yaowei and Yu, Huangyue and He, Junchao and Wang, Yan and Huang, Jiangyong and Shen, Hongyu and Ni, Junfeng and Wang, Shaofei and Jia, Baoxiong and Zhu, Song-Chun and Huang, Siyuan}, title = {Lifting Unlabeled Internet-level Data for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5814-5827} }
AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seungeun and Moon, SeungJun and Lew, Hah Min and Kang, Ji-Su and Park, Gyeong-Moon}, title = {AudioAvatar: Personalized Audio-driven Whole-body Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3998-4010} }
GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Saelyne and Yu, Jaesang and Peng, Yi-Hao and Lin, Kevin Qinghong and Cho, Jae Won and Song, Yale and Kim, Juho}, title = {GUIDE: A Benchmark for Understanding and Assisting Users in Open-Ended GUI Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13017-13027} }
Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Ziren and Li, Xiaohan and Tosi, Fabio and Han, Jiawei and Mattoccia, Stefano and Cai, Jianfei and Poggi, Matteo}, title = {Ov3R: Open-Vocabulary Semantic 3D Reconstruction from RGB Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34206-34216} }
GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Yuxuan and Liang, Ruofan and Zakharov, Egor and Bagautdinov, Timur and Cao, Chen and Nam, Giljoo and Saito, Shunsuke and Pons-Moll, Gerard and Romero, Javier}, title = {GeoRelight: Learning Joint Geometrical Relighting and Reconstruction with Flexible Multi-Modal Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29771-29780} }
Yume1.5: A Text-Controlled Interactive World Generation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Xiaofeng and Li, Zhen and Li, Chuanhao and Xu, Xiaojie and Ying, Kaining and Zhang, Kaipeng}, title = {Yume1.5: A Text-Controlled Interactive World Generation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7752-7761} }
UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chang, Haochen and Gao, Kun and Cheng, Yuan and Ren, Pengfei and Yin, Erwei}, title = {UST-Hand: An Uncertainty-aware Spatiotemporal Point Cloud Interaction Network for 3D Self-supervised Hand Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8857-8867} }
MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ruijie and Lu, Jiahao and Hu, Wenbo and Han, Xiaoguang and Cai, Jianfei and Shan, Ying and Zheng, Chuanxia}, title = {MotionCrafter: Dense Geometry and Motion Reconstruction with a 4D VAE}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40303-40315} }
cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shabeeb_2026_CVPR, author = {Shabeeb, Zain and Saeedi, Daniel and Tsui, Darin and Jamali, Vida and Aghazadeh, Amirali}, title = {cryoSENSE: Compressive Sensing Enables High-throughput Microscopy with Sparse and Generative Priors on the Protein Cryo-EM Image Manifold}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34072-34083} }
SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Xuancheng and Li, Yaning and You, Sisi and Bao, Bing-Kun}, title = {SMRABooth: Subject and Motion Representation Alignment for Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16130-16141} }
4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Jin and An, Liang and Cheng, Pujin and Liu, Yebin and Tang, Xiaoying}, title = {4DEquine: Disentangling Motion and Appearance for 4D Equine Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32496-32506} }
PowerCLIP: Powerset Alignment for Contrastive Pre-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kawamura_2026_CVPR, author = {Kawamura, Masaki and Inoue, Nakamasa and Yanagi, Rintaro and Kataoka, Hirokatsu and Yokota, Rio}, title = {PowerCLIP: Powerset Alignment for Contrastive Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22111-22122} }
Monet: Reasoning in Latent Visual Space Beyond Image and Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qixun and Shi, Yang and Wang, Yifei and Zhang, Yuanxing and Wan, Pengfei and Gai, Kun and Ying, Xianghua and Wang, Yisen}, title = {Monet: Reasoning in Latent Visual Space Beyond Image and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12030-12040} }
GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Na_2026_CVPR, author = {Na, Youngju and Yun, Jaeseong and Ryu, Soohyun and Kim, Hyunsu and Yoon, Sung-Eui and Yeon, Suyong}, title = {GLINT: Modeling Scene-Scale Transparency via Gaussian Radiance Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7256-7265} }
CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Huidong and Chen, Wentao and Chen, Jie and Cai, Xinqi and Ma, Ruolong and Zheng, Yinglin and Lin, Yuxin and Zeng, Ming}, title = {CoCoVideo: The High-Quality Commercial-Model-Based Contrastive Benchmark for AI-Generated Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11346-11356} }
Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zhuoxuan and Zhu, Boan and Zhang, Xingjian and Li, Wenying and Chan, S.-H. Gary}, title = {Expanding mmWave Datasets for Human Pose Estimation with Unlabeled Data and LiDAR Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21221-21230} }
Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Young_2026_CVPR, author = {Young, Si-Sheng and Lin, Chia-Hsiang}, title = {Spectral Super-Resolution via Adversarial Unfolding and Data-Driven Spectrum Regularization: From Multispectral Satellite Data to NASA Hyperspectral Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27827-27837} }
Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Gatenyo_2026_CVPR, author = {Gatenyo, Rotem and Fried, Ohad}, title = {Copy-Transform-Paste: Zero-Shot Object-Object Alignment Guided by Vision-Language and Geometric Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14936-14945} }
When to Think and When to Look: Uncertainty-Guided Lookback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Jing and Bellos, Filippos and Guo, Junjia and Li, Yayuan and Huang, Chao and Tang, Yunlong and Song, Luchuan and Liang, Susan and Zhang, Zhongfei and Corso, Jason J. and Xu, Chenliang}, title = {When to Think and When to Look: Uncertainty-Guided Lookback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5104-5113} }
Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anisha_2026_CVPR, author = {Anisha, Alabi Mehzabin and Wang, Guangjing and Chellappan, Sriram}, title = {Generative Adversarial Perturbations with Cross-paradigm Transferability on Localized Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20629-20638} }
Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yidan and Wang, Zongheng and Xing, Hongjie and Li, Chunguo and Liu, Xiaoxiao}, title = {Multi-Metric Representation Learning Strategy Based on Clustering for Fine-Grained Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37864-37873} }
MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yi and Zhong, Ningze and Fu, Zhiheng and Wang, Longguang and Zhang, Ye and Guo, Yulan}, title = {MangoBench: A Benchmark for Multi-Agent Goal-Conditioned Offline Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6219-6228} }
VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Gehlaut_2026_CVPR, author = {Gehlaut, Tarun and Liu, Difan and Bansal, Charu and Malani, Krutik and Chakraborty, Souymodip and Phogat, Ankit and Fisher, Matthew and Batra, Vineet}, title = {VectorArk: Learning Practical Image Vectorization with Rounded Polygon Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31619-31627} }
ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Duowen and Wang, Yan}, title = {ProxyFL: A Proxy-Guided Framework for Federated Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17525-17534} }
FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Tian and Yang, Zhiqin and Zhang, Yonggang and Jiang, Xuefeng and Peng, Hao and Wang, Yuwei and Han, Bo}, title = {FedRG: Unleashing the Representation Geometry for Federated Learning with Noisy Clients}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24545-24556} }
MUFASA: A Multi-Layer Framework for Slot Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Bock_2026_CVPR, author = {Bock, Sebastian and Sch\"u{\ss}ler, Leonie and Singh, Krishnakant and Schaub-Meyer, Simone and Roth, Stefan}, title = {MUFASA: A Multi-Layer Framework for Slot Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27750-27760} }
RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation-
[pdf]
[supp]
[bibtex]@InProceedings{Houdre_2026_CVPR, author = {Houdr\'e, Nicolas and Marcos, Diego and de Turckheim, Hugo Riffaud and Ienco, Dino and Wendling, Laurent and Kurtz, Camille and Lobry, Sylvain}, title = {RAMEN: Resolution-Adjustable Multimodal Encoder for Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27838-27848} }
Designing to Forget: Deep Semi-parametric Models for Unlearning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Amber Yijia and Tai, Yu-Shan and Yeh, Raymond A.}, title = {Designing to Forget: Deep Semi-parametric Models for Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17409-17419} }
DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Enhui and Zhang, Jiahuan and Zheng, Guantian and Tang, Tao and Li, Shengbo Eben and Lu, Yuhang and Zhou, Xia and Zhang, Xueyang and Zhan, Yifei and Zhan, Kun and Hao, Zhihui and Lang, Xianpeng and Yu, Kaicheng}, title = {DriveCombo: Benchmarking Compositional Traffic Rule Reasoning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32113-32123} }
SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ruixiang and Xu, Zhihao and Lan, Bangxiang and Xin, Zijie and Liu, Jingyu and Li, Xirong}, title = {SAVE: Speech-Aware Video Representation Learning for Video-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31166-31175} }
Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Guantao and Zheng, Shikang and Lin, Yuqi and Zhang, Linfeng}, title = {Forecast the Principal, Stabilize the Residual: Subspace-Aware Feature Caching for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23632-23641} }
ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Weihao and Zhang, Shan and Sun, Yanpeng and Wu, Jingjing and Xie, Qunyi and Tan, Xiao and Chen, Kunbin and He, Wei and Li, Xiaofan and Zhao, Na and Wang, Jingdong and Li, Zechao}, title = {ViLoMem: Agentic Learner with Grow-and-Refine Multimodal Semantic Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5476-5486} }
Time Blindness: Why Video-Language Models Can't See What Humans Can?-
[pdf]
[supp]
[bibtex]@InProceedings{Upadhyay_2026_CVPR, author = {Upadhyay, Ujjwal and Ranjan, Mukul and Shen, Zhiqiang and Elhoseiny, Mohamed}, title = {Time Blindness: Why Video-Language Models Can't See What Humans Can?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30906-30918} }
VENI: Variational Encoder for Natural Illumination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Walker_2026_CVPR, author = {Walker, Paul and Gardner, James A. D. and Ardelean, Andreea and Smith, William A. P. and Egger, Bernhard}, title = {VENI: Variational Encoder for Natural Illumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16248-16257} }
Learnability-Driven Submodular Optimization for Active Roadside 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ruiyu and Zhang, Baoming and Ruozzi, Nicholas and Guo, Yunhui}, title = {Learnability-Driven Submodular Optimization for Active Roadside 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11579-11588} }
Stepwise Credit Assignment for GRPO on Flow-Matching Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Savani_2026_CVPR, author = {Savani, Yash and Kveton, Branislav and Liu, Yuchen and Wang, Yilin and Shi, Jing and Mukherjee, Subhojyoti and Vlassis, Nikos and Singh, Krishna Kumar}, title = {Stepwise Credit Assignment for GRPO on Flow-Matching Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42007-42017} }
MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Changli and Wang, Haodong and Ji, Jiayi and Yao, Yutian and Du, Chunsai and Kang, Jihua and Fu, Yanwei and Cao, Liujuan}, title = {MVGGT: Multimodal Visual Geometry Grounded Transformer for Multiview 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16800-16810} }
Next-Scale Autoregressive Models for Text-to-Motion Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwei and Jin, Shibo and Liu, Lingjie and Zhao, Mingmin}, title = {Next-Scale Autoregressive Models for Text-to-Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16376-16386} }
Tea-Adapter: Teacher Adapter for Efficient Conditional Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yinhan and Ma, Yue and Yi, Fangqiu and Qi, Chenyang and Zhang, Chi and Feng, Kunyu and Wang, Zeyu}, title = {Tea-Adapter: Teacher Adapter for Efficient Conditional Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4805-4815} }
Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xinghao and Niu, Jianwei and Liu, Xuefeng and Zhu, Guogang and Zhang, Jiayuan and Tang, Shaojie and Chen, Wei}, title = {Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10313-10323} }
PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Wenlong and Chao, Yu-Wei and Mousavian, Arsalan and Liu, Ming-Yu and Fox, Dieter and Mo, Kaichun and Fei-Fei, Li}, title = {PointWorld: Scaling 3D World Models for In-The-Wild Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20765-20779} }
BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Po_2026_CVPR, author = {Po, Ryan and Chan, Eric Ryan and Chen, Changan and Wetzstein, Gordon}, title = {BAgger: Backwards Aggregation for Mitigating Drift in Autoregressive Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43727-43739} }
TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yu and Yan, Hongbin and Cao, Juan and Cheng, Yiji and Hang, Tiankai and He, Runze and Yin, Zijin and Zhang, Shiyi and Zhang, Yuxin and Li, Jintao and Wang, Chunyu and Lu, Qinglin and Lee, Tong-Yee and Tang, Fan}, title = {TAG-MoE: Task-Aware Gating for Unified Generative Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27270-27280} }
Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jiawei and Poggi, Matteo and Huan, Li and Wang, Changshuo and Liu, Kaiqi and Li, Wei}, title = {Image-to-Point Cloud Feature Back-Projection for Multimodal Training of 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42049-42060} }
MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction-
[pdf]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Linjie and Xiao, Jin and Liu, Xiangrong and Sun, Changming and Cui, Hui and Fang, Yuqi and Su, Ran and Jin, Qiangguo and Wei, Leyi}, title = {MDCS-MoAME: Multi-directional Composite Scanning with Mixture of Attention and Mamba Experts for Cancer Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14461-14470} }
GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Xujing and Wang, Chuxin and Ai, Yubo and Cheng, Zhixin and Li, Zhuoyuan and Liu, Liangsheng and Chen, Yujia and Li, Xinjun and Li, Qiao and Yang, Wenfei and Zhang, Tianzhu}, title = {GeoGuide: Hierarchical Geometric Guidance for Open-Vocabulary 3D Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26855-26866} }
Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Han and Tang, Haoyu and Mu, Xiaoxuan and Li, Chen and Zhu, Jihua}, title = {Memory Matters: Boosting Training-Free Zero-Shot Temporal Action Localization with a Learnable Lookup Table}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9762-9772} }
TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fang and Zou, Shihao and Si, Weixin and Gao, Yang and Li, Shuai and Hao, Aimin}, title = {TRCoRSurg: Temporal-Relational Co-Reasoning for Surgical Video Triplet Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2811-2820} }
Volumetric Functional Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maggioli_2026_CVPR, author = {Maggioli, Filippo and Melzi, Simone and Livesu, Marco}, title = {Volumetric Functional Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20443-20454} }
Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuan and Dou, Sihao and Hu, Kai and Deng, Shuhua and Cao, Chunhong and Xiao, Fen and Gao, Xieping}, title = {Focus-to-Perceive Representation Learning: A Cognition-Inspired Hierarchical Framework for Endoscopic Video Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28264-28274} }
Improved Mean Flows: On the Challenges of Fastforward Generative Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Zhengyang and Lu, Yiyang and Wu, Zongze and Shechtman, Eli and Kolter, J. Zico and He, Kaiming}, title = {Improved Mean Flows: On the Challenges of Fastforward Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30467-30476} }
Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs-
[pdf]
[supp]
[bibtex]@InProceedings{Janjua_2026_CVPR, author = {Janjua, Muhammad Kamran and Silva, Hugo and Niu, Di and Rashidi, Bahador}, title = {Don't Show Pixels, Show Cues: Unlocking Visual Tool Reasoning in Language Models via Perception Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5165-5174} }
Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunbei and Cai, Chengyi and Liu, Feng and Hamm, Jihun}, title = {Prime Once, then Reprogram Locally: An Efficient Alternative to Black-Box Service Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6176-6187} }
Clothe and Pose-
[pdf]
[supp]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Nakul and Bansal, Aayush and Vo, Minh}, title = {Clothe and Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2015-2024} }
FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Weiheng and Yu, An and Li, Jian and Zhang, Zhenfei and Ye, Felix X.-F. and Chang, Ming-Ching}, title = {FAVE: A Structured Benchmark for Fine-Grained Audio-Visual Temporal Evaluation in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1651-1660} }
Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Qiwei and Cai, Boyang and Lai, Minghao and Zhuang, Sitong and Lin, Tao and Qin, Yan and Ye, Yixuan and Liang, Jiaming and Xu, Renjing}, title = {Bootstrap Dynamic-Aware 3D Visual Representation for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13419-13429} }
DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ngo_2026_CVPR, author = {Ngo, Tuan Duc and Huang, Jiahui and Oh, Seoung Wug and Blackburn-Matzen, Kevin and Kalogerakis, Evangelos and Gan, Chuang and Lee, Joon-Young}, title = {DAGE: Dual-Stream Architecture for Efficient and Fine-Grained Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21701-21712} }
Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fayyazi_2026_CVPR, author = {Fayyazi, Arya and Akrami, Haleh}, title = {Proof-of-Perception: Certified Tool-Using Multimodal Reasoning with Compositional Conformal Guarantees}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5144-5153} }
Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ham_2026_CVPR, author = {Ham, Yujin and Kim, Junho and Boominathan, Vivek and Balakrishnan, Guha}, title = {Generating Humanless Environment Walkthroughs from Egocentric Walking Tour Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4322-4331} }
POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Yang, Chengqun and Su, Zhuo and Lv, Zheng and Gao, Jingnan and Zhang, Xiaoyuan and Yang, Xiaokang and Yan, Yichao}, title = {POLAR: A Portrait OLAT Dataset and Generative Framework for Illumination-Aware Face Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28871-28881} }
ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhaoyang and Ling, Zhan and Zhou, Yuchen and Gong, Litian and Biyik, Erdem and Su, Hao}, title = {ORIC: Benchmarking Object Recognition under Contextual Incongruity in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23673-23684} }
2D-LFM: Lifting Foundation Model without 3D Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Dabhi_2026_CVPR, author = {Dabhi, Mosam and Gill, Irhas and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {2D-LFM: Lifting Foundation Model without 3D Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34303-34311} }
QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohua and Jin, Jian and Liu, Meiqin and Yao, Chao and Lin, Weisi}, title = {QD-PCQA: Quality-Aware Domain Adaptation for Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17143-17152} }
Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Zeyu and Li, Hui and Wang, Yu and Wang, Song and Zhu, Congchao and Zheng, Caixia}, title = {Bi-Bridge: Bidirectional Diffusion Bridges for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37455-37464} }
Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jun Young and Jeon, Joo Hyeon and Ahn, Sangyeon and Park, Yoonseo and Oh, Yong Seok and Kim, Bogyeong and Cho, Sung In}, title = {Gradient Knows Best: Mixed-Precision Quantization via Gradient-Guided Bit Allocation for Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16355-16364} }
StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mingyu and Shu, Jiuhe and Chen, Hui and Li, Zeju and Zhao, Canyu and Yang, Jiange and Gao, Shenyuan and Chen, Hao and Shen, Chunhua}, title = {StaMo: Unsupervised Learning of Generalizable Robot Motion from Compact State Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35014-35024} }
Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhikang and Xu, Qianqian and Wang, Zitai and Hua, Cong and Li, Sicong and Yang, Zhiyong and Huang, Qingming}, title = {Mind the Way You Select Negative Texts: Pursuing the Distance Consistency in OOD Detection with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34650-34661} }
Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Kuckreja_2026_CVPR, author = {Kuckreja, Kartik and Gupta, Parul and Khan, Muhammad Haris and Dhall, Abhinav}, title = {Pixels Don't Lie (But Your Detector Might): Bootstrapping MLLM-as-a-Judge for Trustworthy Deepfake Detection and Reasoning Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25871-25881} }
DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Xiaoxu and Chen, Zhongmin and Yang, Bo and Chen, Weikai and Liu, Weixiao and Gao, Lin}, title = {DualPrim: Compact 3D Reconstruction with Positive and Negative Primitives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29081-29091} }
TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhi and Weng, Dongdong and Su, Mo and Chen, Yixiao and Dongye, Xiaonuo and Xu, Chenyu}, title = {TagSplat: Topology-Aware Gaussian Splatting for Dynamic Mesh Modeling and Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40971-40980} }
RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yueying and Guo, Yiteng and Zhang, Weidong and Wen, Jie and Shen, Liquan and Yan, Huaicheng and Xu, Xin}, title = {RHCNet: Residual-Guided Hierarchical Calibration Network for Robust Underwater Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4393-4402} }
VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zelai and Xu, Zhexuan and Yi, Xiangmin and Yuan, Huining and Guang, Mo and Long, Kaiwen and Chen, Xinlei and Wu, Yi and Yu, Chao and Wang, Yu}, title = {VS-Bench: Evaluating VLMs for Strategic Abilities in Multi-Agent Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21561-21572} }
Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Juxin and Shi, Haoyu and Wang, Mengyao and Zhang, Huaiwen}, title = {Machine Unlearning via Adaptive Gradient Reweighting and Multi-stage Objective Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39403-39413} }
CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Darshan and Nagrani, Arsha and Manikantan, Kawshik and Singh, Harman and Tewari, Dinesh and Weyand, Tobias and Schmid, Cordelia and Angelova, Anelia and Dave, Shachi}, title = {CURVE: A Benchmark for Cultural and Multilingual Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32860-32871} }
Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hongyu and Chen, Haipeng and Xu, Zhimin and Yang, Chengxin and Lyu, Yingda}, title = {Diffusion-Based Native Adversarial Synthesis for Enhanced Medical Segmentation Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1461-1471} }
Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yi and Qian, Shengju and Zhu, Lingting and Liu, Lei and Qiao, Wandi and Li, Ziqiang and Yu, Lequan and Li, Bin}, title = {Proxy-Tuning: Tailoring Multimodal Autoregressive Models for Subject-Driven Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43331-43340} }
CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singha_2026_CVPR, author = {Singha, Mainak and Mehrotra, Sarthak and Casari, Paolo and Chaudhuri, Subhasis and Ricci, Elisa and Banerjee, Biplab}, title = {CLIPoint3D: Language-Grounded Few-Shot Unsupervised 3D Point Cloud Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9582-9592} }
Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yafei and Ma, Meng and Li, Huafeng and Liu, Yu}, title = {Missing No More: Dictionary-Guided Cross-Modal Image Fusion under Missing Infrared}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19549-19558} }
Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Huimin and Hu, Boxuan and Zhang, Yulin and Zhou, Xiuzhuang and Hu, Junlin}, title = {Hyperbolic Defect Feature Synthesis for Few-Shot Defect Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19602-19612} }
TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jun and Wang, Teng and Ge, Yuying and Ge, Yixiao and Li, Xinhao and Wang, Limin}, title = {TimeLens: Rethinking Video Temporal Grounding with Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10419-10429} }
HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Mingjin and Chen, Junhao and Fan, Zhaoxin and Lee, Yujian and Dang, Zichen and Wang, Lili and Cui, Yawen and Chau, Lap-Pui and Wang, Yi}, title = {HVG-3D: Bridging Real and Simulation Domains for 3D-Conditional Hand-Object Interaction Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15986-15997} }
PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Wonyong and Moon, Jaeho and Lee, Jaehyup and Kim, Soo Ye and Kim, Munchurl}, title = {PropFly: Learning to Propagate via On-the-Fly Supervision from Pre-trained Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43228-43238} }
Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Shengming and Zhang, Zekai and Tang, Zecheng and Gao, Kaiyuan and Xu, Xiao and Yan, Kun and Li, Jiahao and Chen, Yilei and Chen, Yuxiang and Shum, Heung-Yeung and Ni, Lionel M. and Lin, Junyang and Wu, Chenfei}, title = {Qwen-Image-Layered: Towards Inherent Editability via Layer Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16196-16205} }
LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2026_CVPR, author = {Man, Yunze and Wang, Shihao and Zhang, Guowen and Bjorck, Johan and Gui, Liang-Yan and Fan, Jim and Kautz, Jan and Wang, Yu-Xiong and Yu, Zhiding}, title = {LocateAnything3D: Vision-Language 3D Detection with Chain-of-Sight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31089-31102} }
Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Jinqi and Yang, Jinyu and Neiman, Tal and Fan, Lei and Yin, Bing and Tran, Son and Shah, Mubarak and Vidal, Ren\'e}, title = {Dictionary-Aligned Concept Control for Safeguarding Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15815-15828} }
ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Juze and Chen, Changan and Chen, Xin and Yu, Heng and Xiang, Tiange and Khan, Ali Sartaz and Lakshmikanth, Shrinidhi K. and Adeli, Ehsan}, title = {ViBES: A Conversational Agent with Behaviorally-Intelligent 3D Virtual Body}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39994-40008} }
Bridging Facial Understanding and Animation via Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Luchuan and Liu, Pinxin and Liu, Haiyang and Jin, Zhenchao and Tang, Yolo Yunlong and Xu, Zichong and Liang, Susan and Bi, Jing and Corso, Jason J and Xu, Chenliang}, title = {Bridging Facial Understanding and Animation via Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17557-17567} }
HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Woo_2026_CVPR, author = {Woo, Suhan and Lee, Seongwon and Jang, Jinwoo and Kim, Euntai}, title = {HypeVPR: Exploring Hyperbolic Space for Perspective to Equirectangular Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12183-12192} }
Image-based Outlier Synthesis With Training Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Regmi_2026_CVPR, author = {Regmi, Sudarshan}, title = {Image-based Outlier Synthesis With Training Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39338-39350} }
VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tao Jun and Shi, Yujiao and Li, Hongdong}, title = {VGA: Empowering Aerial-Ground Localization by Visual Geometry Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5409-5420} }
ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Dezhi and Feng, Zhengzhao and Liang, Qiliang and Wang, Hao and Sun, Haofei and Yang, Changpeng and Li, Yang and Zhou, Peng and Nie, Shuai and Wang, Hongzhen and Zhou, Linfeng and Jia, Hao and Xu, Jiaming and Shi, Runyu and Huang, Ying}, title = {ProactiveMobile: A Comprehensive Benchmark for Boosting Proactive Intelligence On Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27503-27513} }
AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Wenxuan and Xu, Xiuwei and Liu, Yichen and Li, Xiangyu and Yin, Hang and Chen, Huangxing and Zheng, Wenzhao and Feng, Jianjiang and Zhou, Jie and Lu, Jiwen}, title = {AwareVLN: Reasoning with Self-awareness for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4065-4075} }
Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Yasiru and Schenck, Elim and Yellin, Florence and Hu, Shuowen and Funk, Christopher and Patel, Vishal M.}, title = {Thermal-Det: Language-Guided Cross-Modal Distillation for Open-Vocabulary Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34628-34637} }
Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Zhang, Renrui and Li, Hongyu and Zhang, Manyuan and Chen, Xinyan and Wang, Sifan and Feng, Yan and Pei, Peng and Heng, Pheng-Ann}, title = {Thinking-while-Generating: Interleaving Textual Reasoning throughout Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26295-26305} }
Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios-
[pdf]
[supp]
[bibtex]@InProceedings{Sui_2026_CVPR, author = {Sui, Zhipeng and Hao, Haiqing and He, Weihua and Lee, Seng-Hong and Wang, Wenhui}, title = {Adaptive Spatial-Temporal Window: Unlocking the Potential of Event Cameras in Heterogeneous Velocity Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {946-955} }
DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyao and Xue, Ying and Zhang, Haiming and Jiang, Guangfeng and Zhou, Wending and Yan, Xu and Gao, Jiantao and Cai, Yingjie and Liu, Bingbing and Li, Zhen and Shen, Shaojie}, title = {DLWM: Dual Latent World Models enable Holistic Gaussian-centric Pre-training in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39713-39723} }
Dual-Granularity Memory for Efficient Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hongjun and Liu, Lin and Li, Jianguo and Lin, Tao}, title = {Dual-Granularity Memory for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38016-38026} }
RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Cui, Zhenyu and Zang, Zehua and Zhou, Jiahuan}, title = {RS-SSM: Refining Forgotten Specifics in State Space Model for Video Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10741-10752} }
VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Young Rok and Kong, Hyesoo and An, Kyunghwan and Huh, Jae Sub and KIM, Gyeonghun and Choi, Stanley Jungkyu}, title = {VinQA: Visual Elements Interleaved Long-form Answer Generation for Real-World Multimodal Document QA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41130-41139} }
RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Zhang, Yang and Tang, Guijian and Zhang, Chao and Zhang, Shibo and Li, Xueqiong and Yang, Shaowu}, title = {RAG-TP: A General Framework for Vehicle Trajectory Prediction via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24865-24874} }
Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Runze and Cheng, Yiji and Hang, Tiankai and Li, Zhimin and Xu, Yu and Yin, Zijin and Zhang, Shiyi and Dai, Wenxun and Du, Penghui and Ma, Ao and Wang, Chunyu and Lu, Qinglin and Han, Jizhong and Dai, Jiao}, title = {Re-Align: Structured Reasoning-guided Alignment for In-Context Image Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9051-9062} }
A Bit is All You Need! Efficient Video Capture via Single Bit Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Gandikota_2026_CVPR, author = {Gandikota, Kanchana Vaishnavi and Moeller, Michael and Kolb, Andreas and Choubey, Bhaskar and Chandramouli, Paramanand}, title = {A Bit is All You Need! Efficient Video Capture via Single Bit Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34016-34027} }
BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuzhou and Zhu, Lingjie and Ye, Hanqiao and Liu, Yujun and Huang, Shangfeng and Gao, Xiang and Wang, Ruisheng and Shen, Shuhan}, title = {BuildingGPT: Auto-Regressive Building Wireframe Reconstruction Model with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36400-36410} }
PositionIC: Unified Position and Identity Consistency for Image Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junjie and Han, Tianyang and Ma, Kai and Gao, Jialin and Song, Yang and He, Xianhua and Luo, Junfeng and Wei, Xiaoming and Zhang, Wenqiang}, title = {PositionIC: Unified Position and Identity Consistency for Image Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9139-9148} }
Beyond Appearance: Camouflaged Object Detection via Geometric Structure-
[pdf]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jinyu and Wu, Changguang and Sun, Fuming and Tang, Jinhui}, title = {Beyond Appearance: Camouflaged Object Detection via Geometric Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25830-25840} }
CLIP Is Shortsighted: Paying Attention Beyond the First Sentence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lavoie_2026_CVPR, author = {Lavoie, Marc-Antoine and Mahmoud, Anas and Zaimi, Aldo and Tchango, Arsene Fansi and Waslander, Steven L.}, title = {CLIP Is Shortsighted: Paying Attention Beyond the First Sentence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9524-9534} }
Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Bao and Ho, Sy-Tuyen and Hao, Koh Jun and Cheung, Ngai-Man}, title = {Do Vision-Language Models Leak What They Learn? Adaptive Token-Weighted Model Inversion Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10283-10292} }
Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guangkai and Geng, Hua and Zheng, Huanyi and Yin, Songyi and Sun, Yanlong and Chen, Hao and Shen, Chunhua}, title = {Unlocking the Power of Critical Factors for 3D Visual Geometry Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28979-28989} }
No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zunkai and Li, Ke and Liu, Jiajia and Yang, Jie and Qiao, Yuanyuan}, title = {No Need For Real Anomaly: MLLM Empowered Zero-Shot Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35648-35658} }
LottieGPT: Tokenizing Vector Animation for Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junhao and Gao, Kejun and Cui, Yuehan and Sun, Mingze and Chen, Mingjin and Wang, Shaohui and Long, Xiaoxiao and Ma, Fei and Tian, Qi and Zhao, Hao and Huang, Ruqi}, title = {LottieGPT: Tokenizing Vector Animation for Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31639-31651} }
Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jeonggon and Moon, Heejoon and Hong, Je Hyeong}, title = {Revisiting Geometric Obfuscation with Dual Convergent Lines for Privacy-Preserving Image Queries in Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {113-123} }
Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guangyang and Ding, Youran and Che, Xinyu and Sun, Benyuan and Yang, Yi and Liu, Xiaohong}, title = {Matching Every Pair to Track Every Point: PairFormer for All-Pairs Tracking and Video Trajectory Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35187-35196} }
Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Minseok and Hamilton, Mark and Kim, Changick}, title = {Upsample Anything: A Simple and Hard to Beat Baseline for Feature Upsampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29707-29716} }
Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Yu and Gao, Longjun and Su, Yuanqi and Lu, HaoAng and Zhang, Xiaoning}, title = {Sparsity-Aware Voxel Attention and Foreground Modulation for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5751-5761} }
ShapeR: Robust Conditional 3D Shape Generation from Casual Captures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Siddiqui_2026_CVPR, author = {Siddiqui, Yawar and Frost, Duncan and Aroudj, Samir and Avetisyan, Armen and Howard-Jenkins, Henry and DeTone, Daniel and Moulon, Pierre and Wu, Qirui and Li, Zhengqin and Straub, Julian and Newcombe, Richard and Engel, Jakob}, title = {ShapeR: Robust Conditional 3D Shape Generation from Casual Captures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27157-27168} }
Exploring Conditions for Diffusion Models in Robotic Control-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shin_2026_CVPR, author = {Shin, Heeseong and Heo, Byeongho and Han, Dongyoon and Kim, Seungryong and Kim, Taekyung}, title = {Exploring Conditions for Diffusion Models in Robotic Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27967-27977} }
TV2TV: A Unified Framework for Interleaved Language and Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xiaochuang and Emad, Youssef and Hall, Melissa and Nguyen, John and Padthe, Karthik and Robbins, Liam and Bar, Amir and Chen, Delong and Drozdzal, Michal and Elbayad, Maha and Hu, Yushi and Li, Shang-Wen and Verbeek, Jakob and Wang, XuDong and Ghazvininejad, Marjan and Zettlemoyer, Luke and Dinan, Emily}, title = {TV2TV: A Unified Framework for Interleaved Language and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7695-7706} }
Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Chuanqing and Lu, Xin and Deng, Zehui and Lu, Zhengda and Wang, Yiqun and Diao, Junqi and Xiao, Jun}, title = {Pose-Free Omnidirectional Gaussian Splatting for 360-Degree Videos with Consistent Depth Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4881-4890} }
LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Peiliang and Liu, Jiacheng and Xu, Haowen and Wang, Xinyu and Zou, Chang and Zhang, Linfeng}, title = {LESA: Learnable Stage-Aware Predictors for Diffusion Model Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43300-43309} }
ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuhuan and Pan, Aoxuan and Huang, Yi-Hua and Chang, Chirui and Dai, Peng and Yu, Xin and Qi, Xiaojuan}, title = {ObjectMorpher: 3D-Aware Image Editing via Deformable 3DGS}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5828-5838} }
Residual Connections Harm Generative Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Jiang, Ruoxi and Gao, William and Willet, Rebecca and Maire, Michael}, title = {Residual Connections Harm Generative Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39669-39679} }
Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qian and Chandhok, Shivam and Ma\~nas, Oscar and Jain, Kanishk and Agrawal, Aishwarya and Sigal, Leonid}, title = {Learning What Matters: Prioritized Concept Learning via Relative Error-driven Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15794-15804} }
SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yimin and Pu, Nan and Yang, Fengxiang and Li, Wenjing and Li, Zhihui and Zhong, Zhun}, title = {SANER: Switchable Adapter with Non-parametric Enhanced Routing for Person De-Reidentification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40376-40385} }
Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xuanpu and Tan, Zhentao and Sheng, Dianmo and Chen, Tianxiang and Liu, Yao and Wu, Yue and Gong, Tao and Chu, Qi and Yu, Nenghai}, title = {Learning to Focus and Precise Cropping:A Reinforcement Learning Framework with Information Gaps and Grounding Loss for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25938-25947} }
SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2026_CVPR, author = {Chung, Jiwoo and Hyun, Sangeek and Lee, MinKyu and Han, Byeongju and Cha, Geonho and Wee, Dongyoon and Hong, Youngjun and Heo, Jae-Pil}, title = {SeaCache: Spectral-Evolution-Aware Cache for Accelerating Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14283-14294} }
SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiang and Li, Feifei and Zhang, Mi and Hong, Geng and You, Xiaoyu and Yang, Min}, title = {SafeRoPE: Risk-specific Head-wise Embedding Rotation for Safe Generation in Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {690-700} }
Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Sidan and Xu, Hongteng and Luo, Dixin}, title = {Self-Paced and Self-Corrective Masked Prediction for Movie Trailer Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7684-7694} }
LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Zhang, Qin and Zhou, Kun}, title = {LangRef3DGS: Natural Language-Guided 3D Referential Segmentation from Partial Observations via 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38595-38605} }
GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Vardhana_2026_CVPR, author = {Vardhana, Korada Sri and Biswas, Soma}, title = {GenErase: Generalizable and Semantically-Aware Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2327-2335} }
RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Panjun and Xia, Jiyuan and Guan, Yuanshen and Li, Yong and Lang, Zhiqiang and Xu, Ruikang and Chen, Chang and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {RawMetaDiff: Unlocking Extreme Darkness from Dual-Exposure RAW with Meta-Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5617-5626} }
VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Min and Zhang, Xinwen and Tang, Jialei and Zhou, Xin and Li, Kehan and Huang, Zeyi and Wang, Limin}, title = {VideoRealBench: A Chain-of-Thought Realism Evaluation Benchmark for Generated Human-Centric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18565-18575} }
Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihao and Wei, Yuxiang and Zhou, Xinpeng and Zhang, Tianyu and Liang, Tao and Bai, Yalong and Zhang, Hongzhi and Zuo, Wangmeng}, title = {Premier: Personalized Preference Modulation with Learnable User Embedding in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29146-29156} }
NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanying and Li, Jinyang and He, Shengfeng and Xu, Yangyang and Dong, Junyu and Du, Yong}, title = {NimbusGS: Unified 3D Scene Reconstruction under Hybrid Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5038-5048} }
Computer Vision with a Superpixelation Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mahalingam_2026_CVPR, author = {Mahalingam, Sasidharan and Brown, Rachel and Ingle, Atul}, title = {Computer Vision with a Superpixelation Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41773-41783} }
Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Metzger_2026_CVPR, author = {Metzger, Nando and Truong, Prune and Bhat, Goutam and Schindler, Konrad and Tombari, Federico}, title = {Elastic3D: Controllable Stereo Video Conversion with Guided Latent Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32693-32703} }
gQIR: Generative Quanta Image Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garg_2026_CVPR, author = {Garg, Aryan and Ma, Sizhuo and Gupta, Mohit}, title = {gQIR: Generative Quanta Image Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19759-19770} }
DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Cao, Guiping and Song, Yaguang and Tao, Ming and Gong, Haoran and Liu, Junhui and Wang, Yaowei and Jiang, Dongmei}, title = {DuetMerging: Synergizing Dynamic and Static Strategies for Mitigating Task Interference in Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41954-41963} }
MatSpray: Fusing 2D Material World Knowledge on 3D Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Langsteiner_2026_CVPR, author = {Langsteiner, Philipp and Dihlmann, Jan-Niklas and Lensch, Hendrik}, title = {MatSpray: Fusing 2D Material World Knowledge on 3D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22615-22625} }
ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lihong and Li, Liangqi and Feng, Weiwei and Wu, Jiamin and Miao, Changtao and Wu, Tieru and Ma, Rui and Zhang, Bo and Li, Zhe}, title = {ViRC: Enhancing Visual Interleaved Mathematical CoT with Reason Chunking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26144-26153} }
FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yidi and Fan, Zihao and Huang, Jie and Xiao, Jie and Li, Dong and Zhang, Wenlong and BAI, LEI and Fu, Xueyang and Zha, Zheng-jun}, title = {FinPercep-RM: A Fine-grained Reward Model and Co-evolutionary Curriculum for RL-based Real-world Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4839-4849} }
UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yixun and Luo, Kunming and Chen, Xiao and Chen, Rui and Yan, Hongyu and Li, Weiyu and Liu, Jiarui and Tian, Fei-Peng and Tan, Ping}, title = {UniTEX: Universal High Fidelity Generative Texturing for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19917-19927} }
ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Shengyuan and Fang, Xinyu and Liu, Ziyu and Zang, Yuhang and Cao, Yuhang and Zhao, Xiangyu and Duan, Haodong and Dong, Xiaoyi and Liang, Jianze and Wang, Bin and He, Conghui and Lin, Dahua and Wang, Jiaqi}, title = {ARM-Thinker: Reinforcing Multimodal Generative Reward Models with Agentic Tool Use and Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22195-22205} }
Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Bowen and Wang, Yuanbin and Xu, Huajiang and Chen, Biaolong and Zhang, Aixi and Jiang, Hao and Jin, Zhengzheng and Liu, Xu and Huang, Pipei}, title = {Denoising as Path Planning: Training-Free Acceleration of Diffusion Models with DPCache}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43632-43642} }
GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiting and Yang, Xulei and Liao, Jingyi and Zhang, Jing and Liu, Fayao}, title = {GPFlow: Gaussian Prototype Probability Flow for Unsupervised Multi-Modal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43103-43112} }
MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Dae-Hyeon and Baek, Mina and Ha, Jeong-Hun and Park, Chan-Seop and Ganiev, Jamshidjon and Bae, Seung-Hwan}, title = {MVLM: Template-Free Tracking via Vision-Language Margin Confidence and Memory-Gated Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35156-35165} }
Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zeyuan and Yu, Xueyang and Chen, Delin and Shen, Maohao and Gan, Chuang}, title = {Machine Mental Imagery: Empower Multimodal Reasoning with Latent Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33510-33520} }
InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinqin and Chen, Fuhai and Wu, Jipeng and Chen, Zhiwei and Hu, Zhikai and Cai, Weiwei}, title = {InTrain: Intrinsic Trainability for Zero-Cost Neural Architecture Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20181-20190} }
Flow Map Distillation Without Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Shangyuan and Ma, Nanye and Xie, Saining and Jaakkola, Tommi}, title = {Flow Map Distillation Without Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33973-33984} }
Unified Vector Floorplan Generation via Markup Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko}, title = {Unified Vector Floorplan Generation via Markup Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39262-39271} }
Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bo and Xu, Xinan and Yan, Shuo and Bai, Yu and Zhang, Zheng and Wang, Wufan and Gao, Hui and Wang, Wendong}, title = {Contrastive Cross-Bag Augmentation for Multiple Instance Learning-based Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21089-21098} }
Cluster-aware Anchor Learning for Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhe and Meng, Fanhui and Xu, Tianyang and Wu, Xiao-Jun}, title = {Cluster-aware Anchor Learning for Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17714-17723} }
Learning from Synthetic Data via Provenance-Based Input Gradient Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagano_2026_CVPR, author = {Nagano, Koshiro and Fujii, Ryo and Hachiuma, Ryo and Sato, Fumiaki and Sekii, Taiki and Saito, Hideo}, title = {Learning from Synthetic Data via Provenance-Based Input Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18796-18805} }
Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yilong and Tian, Jianxin and Zhang, Shengchuan and Cao, Liujuan}, title = {Discover, Segment, and Select: A Progressive Mechanism for Zero-shot Camouflaged Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34745-34754} }
MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shiyao and Gu\'edon, Antoine and Chen, Shizhe and Lepetit, Vincent}, title = {MAGICIAN: Efficient Long-Term Planning with Imagined Gaussians for Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21606-21615} }
Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Mengyu and Yang, Yanming and Xu, Chenyi and Song, Chenxi and Zuo, Yufan and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Fast3Dcache: Training-free 3D Geometry Synthesis Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27030-27040} }
PrivateEyes: Gaze-Preserving Anonymization for Data Sharing-
[pdf]
[supp]
[bibtex]@InProceedings{Gupta_2026_CVPR, author = {Gupta, Surabhi and Muthumariappan, Dinesh Prabhu and Das, Biplab and Rajagopal, Anoop Kolar and Iyer, Kiran Nanjunda and Seo, Donghwan}, title = {PrivateEyes: Gaze-Preserving Anonymization for Data Sharing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3274-3283} }
ForeAct: Steering Your VLA with Efficient Visual Foresight Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhuoyang and Yang, Shang and Hu, Qinghao and Huang, Luke J. and Hou, James and Sun, Yufei and Lu, Yao and Han, Song}, title = {ForeAct: Steering Your VLA with Efficient Visual Foresight Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37195-37205} }
RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Shangran and Lu, Lu and Chen, Jian and Liu, Qiang}, title = {RAPID: Reusing Attention Sparsity with Inter-step Adaptation for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36147-36156} }
All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yi and Wu, Libing and Zhang, Zhuangzhuang and Qiu, Jing and Huo, Lijuan and Feng, Jiaqi}, title = {All Vehicles Can Lie: Efficient Adversarial Defense in Fully Untrusted-Vehicle Collaborative Perception via Pseudo-Random Bayesian Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6549-6558} }
C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xuewei and Yang, Jiayue and Zeng, Zhiwen and Zhang, Yanyong and Xia, Yan}, title = {C-LaV: Conditional Latent Velocity Field Denoising for Weather-Robust LiDAR Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2928-2937} }
Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jingchen and Han, Shaobo and Patel, Deep and Kohno, Wataru and Jin, Can and Chen, Changyou}, title = {Uncertainty-Aware Knowledge Distillation for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5585-5595} }
Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Cha, SeungJu and Ahn, Yebin and Oh, Hyunwoo and Koh, Sungho and Kim, Dong-Jin}, title = {Adaptive Auxiliary Prompt Blending for Target-Faithful Diffusion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43707-43716} }
Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tianyi and Duan, Zheng-Peng and Guo, Chun-Le and Jiang, Peng-Tao and Li, Bo and Cheng, Ming-Ming and Li, Chongyi}, title = {Time-Aware One Step Diffusion Network for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30543-30552} }
Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Roetzer_2026_CVPR, author = {Roetzer, Paul and Thunberg, Johan and L\"ahner, Zorah and Bernard, Florian}, title = {Fast Markov Random Field Optimisation for Topologically Noisy 3D Shape Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24162-24172} }
Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Woohyeon and Kim, Jaeik and Cho, Sunghwan Steve and Hong, Pa and Jeong, Wookyoung and Nam, Yoojin and Kim, Namjoon and Wong, Ginny Y. and Cheung, Ka Chun and Do, Jaeyoung}, title = {Medic-AD: Towards Medical Vision-Language Model's Clinical Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36321-36331} }
MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Runxun and Liu, Yizhou and Li, Dongrui and Xu, Bo and Wei, Jingwei}, title = {MorphSeek: Fine-grained Latent Representation-Level Policy Optimization for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27460-27470} }
DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yu and Zhang, Kai and Zhu, Wei and Liu, Qingguo and Hu, Xiantao and Li, Jun and Yang, Jian}, title = {DVAR: Dynamic Visual Autoregressive Modeling for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23378-23387} }
GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning-
[pdf]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Fei and Gu, Zhangxuan and Lu, Zhengxi and Zhang, Shangzhan and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yan, Yuchen and Zhang, Wenqi and Shen, Yongliang and Lu, Weiming and Zhuang, Yueting}, title = {GUI-SAGE: Enhancing GUI Automation with Self-Explanatory Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13007-13016} }
3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Hongcan and Xiao, Xinyue and Wang, Yilin and Zhang, Yue and Qi, Yonggang}, title = {3DrawAgent: Teaching LLM to Draw in 3D with Early Contrastive Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27179-27187} }
RAM: Recover Any 3D Human Motion in-the-Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Sen and Zhu, Ning and Zhong, Jinqin and Zhou, Jiale and Zhang, Huaping and Hwang, Jenq-Neng and Li, Lei}, title = {RAM: Recover Any 3D Human Motion in-the-Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42789-42799} }
GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Weiqi and Zhou, Junsheng and Geng, Haotian and Shi, Kanle and Xu, Shenkun and Fang, Yi and Liu, Yu-Shen}, title = {GaussianGrow: Geometry-aware Gaussian Growing from 3D Point Clouds with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18968-18979} }
REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Hankyeol and Baek, Wooyeol and Kim, Seongdo and Kim, Jongyoo}, title = {REVIVE 3D: Refinement via Encoded Voluminous Inflated prior for Volume Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26984-26994} }
UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xingyuan and Du, Songcheng and Zou, Yang and Xu, Haoyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {UniFusion: A Unified Image Fusion Framework with Robust Representation and Source-Aware Preservation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33869-33880} }
GazeShift: Unsupervised Gaze Estimation and Dataset for VR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shapira_2026_CVPR, author = {Shapira, Gil and Goldin, Ishay and Artyomov, Evgeny and Kim, Donghoon and Keller, Yosi and Zehngut, Niv}, title = {GazeShift: Unsupervised Gaze Estimation and Dataset for VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24290-24299} }
A3: Towards Advertising Aesthetic Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Kaiyuan and Gao, Yixuan and Sun, Lu and Zheng, Yushuo and Chen, Zijian and Zhang, Jianbo and Zhu, Xiangyang and Tian, Yuan and Zhang, Zicheng and Zhai, Guangtao}, title = {A3: Towards Advertising Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9478-9490} }
Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Hanjie and Duan, Yuanxing and Dai, Qiyu and Li, Ge and Chen, Baoquan and Wang, He}, title = {Layered 4D-Rotor Gaussian Splatting: A Compressed Representation for Long Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18958-18967} }
Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Wenhao and Li, Ji and Liu, Zhaoqiang}, title = {Just-in-Time: Training-Free Spatial Acceleration for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40643-40652} }
SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaybouti_2026_CVPR, author = {Chaybouti, Sofian and Narayan, Sanath and Dahou, Yasser and Khắc, Ph\'uc H. L\^e and Singh, Ankit and Huynh, Ngoc and Para, Wamiq Reyaz and Kuehne, Hilde and Hacid, Hakim}, title = {SigLino: Efficient Multi-Teacher Distillation for Agglomerative Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10071-10081} }
First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ha_2026_CVPR, author = {Ha, Jiwoo and Baek, Jongwoo and So, Jinhyun}, title = {First Logit Boosting: Visual Grounding Method to Mitigate Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18241-18250} }
Gravitation-Driven Semantic Alignment for Text Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Gravitation-Driven Semantic Alignment for Text Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14946-14956} }
Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Minghao and Liu, Yichen and Liu, Yizhou and Chen, Zizhi and Tang, Jingqun and Wu, Xuecheng and Yang, Dingkang and Zhang, Lihua}, title = {Beyond Pixel Simulation: Pathology Image Generation via Diagnostic Semantic Tokens and Prototype Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42744-42754} }
OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Wu, Jingyu and Xu, Xiangkai and Xie, Kangni and Zhang, Yunchen and Zhong, Bin and Gao, Xurui and Zhang, Min-Ling}, title = {OmniGround: A Comprehensive Spatio-Temporal Grounding Benchmark for Real-World Complex Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17588-17597} }
GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Aoran and Cheng, Shihao and Xu, Yonghao and Ren, Yexian and Chen, Hongruixuan and Yokoya, Naoto}, title = {GeoMMBench and GeoMMAgent: Toward Expert-Level Multimodal Intelligence in Geoscience and Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34843-34853} }
FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhiyuan and Wang, Can and Chen, Dongdong and Liao, Jing}, title = {FlexTraj: Image-to-Video Generation with Flexible Point Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4221-4231} }
Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Qiaojie and Zhang, Jiucai and Zhang, Amy and Zhang, Xiaoli}, title = {Enhancing Accuracy of Uncertainty Estimation in Appearance-based Gaze Tracking with Probabilistic Evaluation and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13793-13801} }
Draft and Refine with Visual Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Sungheon and Masukawa, Ryozo and Park, Jihong and Yun, Sanggeon and Huang, Wenjun and Chen, Hanning and Imani, Mahdi and Imani, Mohsen}, title = {Draft and Refine with Visual Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18816-18826} }
Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Junrong and Fang, Shancheng and Qu, Yadong and Xie, Hongtao}, title = {Seeing is Improving: Visual Feedback for Iterative Text Layout Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25893-25903} }
Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sani_2026_CVPR, author = {Sani, Depanshu and Anand, Saket}, title = {Hier-COS: Making Deep Features Hierarchy-aware via Composition of Orthogonal Subspaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11263-11272} }
Prototype-Guided Concept Erasure in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yuze and Lu, Jiahao and Shi, Hongxiang and Zhou, Yichao and Lu, Hong}, title = {Prototype-Guided Concept Erasure in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16509-16519} }
DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {DBMSolver: A Training-free Diffusion Bridge Sampler for High-Quality Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36062-36071} }
UETrack: A Unified and Efficient Framework for Single Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Ben and Zhao, Jie and Chen, Xin and Geng, Wanting and Zhang, Bin and Zhang, Lu and Wang, Dong and Lu, Huchuan}, title = {UETrack: A Unified and Efficient Framework for Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20890-20901} }
CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Yushan and Zhang, Hui and Xia, Qiming and Jin, Yi and Li, Yidong}, title = {CoLC: Communication-Efficient Collaborative Perception with LiDAR Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2983-2992} }
Mixture of Style Experts for Diverse Image Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shihao and Ouyang, Ziheng and Kang, Yijia and Wang, Qilong and Zhou, Mi and Li, Bo and Cheng, Ming-Ming and Hou, Qibin}, title = {Mixture of Style Experts for Diverse Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30500-30510} }
Consistent Instance Field for Dynamic Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Junyi and Nguyen, Van Nguyen and Planche, Benjamin and Tao, Jiachen and Sun, Changchang and Gao, Zhongpai and Zhao, Zhenghao and Choudhuri, Anwesa and Zhang, Gengyu and Zheng, Meng and Wang, Feiran and Chen, Terrence and Yan, Yan and Wu, Ziyan}, title = {Consistent Instance Field for Dynamic Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3783-3793} }
Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brateanu_2026_CVPR, author = {Brateanu, Alexandru and Mu, Tingting and Ancuti, Codruta O. and Ancuti, Cosmin}, title = {Multinex: Lightweight Low-light Image Enhancement via Multi-prior Retinex}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29887-29896} }
More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain-
[pdf]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Haodong and Jiang, Dongyao and Wang, Jixin and Jia, Junhao and Li, Yanshu and Ma, Yongqiang and Zheng, Nanning}, title = {More Natural, More Real: Object-aware Gaussian Splatting for 3D Visual Decoding from Human Brain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19033-19044} }
FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Dai, Kunyan and Ding, Yi and Ni, Ruobing and Zhang, Ying and Wang, Wenwu and Xie, Zhifeng}, title = {FoleyDesigner: Immersive Stereo Foley Generation with Precise Spatio-Temporal Alignment for Film Clips}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4291-4300} }
HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuan and Li, Xiang and Li, Yali and Hou, Xuege and Wang, Shengjin}, title = {HSI-GPT2: A Dual-Granularity Large Motion Reasoning Model with Diffusion Refinement for Human-Scene Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16432-16442} }
Streamlined Knowledge Distillation-
[pdf]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Hyeon-Jin and Lee, Han-Jin and Choi, Seok-Hwan}, title = {Streamlined Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26646-26655} }
Closed-Form Concept Erasure via Double Projections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Cheng, Jingpu and Wang, Zhixian and Liu, Ping}, title = {Closed-Form Concept Erasure via Double Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24503-24513} }
Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Hao and Huo, Yadong and Qin, Qibing and Zhang, Wenfeng and Huang, Lei}, title = {Intra-class Distribution-guided Generative Hashing with Neighbor Refinement for Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2671-2681} }
Prototype-based Causal Intervention for Multi-Label Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanmin and Mao, Zhilong and Wang, Mao and Liu, Lihua and Wu, Jibing and Bao, Weidong}, title = {Prototype-based Causal Intervention for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24738-24747} }
BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kanglei and Li, Chang and Pan, Qingyi and Wang, Liyuan}, title = {BriMA: Bridged Modality Adaptation for Multi-Modal Continual Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38904-38914} }
Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Feng and Yi, Wenhui and Zhou, Yunpeng and He, Xinan and Rao, Hong and Hu, Shu}, title = {Decoupling Bias, Aligning Distributions: Synergistic Fairness Optimization for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32704-32713} }
Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Bingwen and Liu, Gan and Lu, Xiaoxi and Chen, Guangcheng and Zhang, Jialu and Hu, Yan and Zhang, Xiaoqing and Liu, Jiang}, title = {Real2Sim2Real: RetinalDepth-64K for Depth Estimation in Posterior Segment Ophthalmic Surgery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26899-26908} }
BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shinoda_2026_CVPR, author = {Shinoda, Risa and Shiohara, Kaede and Inoue, Nakamasa and Saito, Kuniaki and Santo, Hiroaki and Okura, Fumio}, title = {BioVITA: Biological Dataset, Model, and Benchmark for Visual-Textual-Acoustic Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29336-29346} }
Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Anh and Tran, Chi and Tran, Anh}, title = {Anti-I2V: Safeguarding your Photos from Malicious Image-to-video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37621-37631} }
HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Phan, Trung and Nguyen, Binh and Doan, Khoa D and Wong, Kok-Seng}, title = {HFedATM: Hierarchical Federated Domain Generalization via Optimal Transport and Regularized Mean Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39435-39444} }
Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tao and Zhang, Kun and Wu, Qiong and Chen, Xiao and Chang, Chao and Sun, Xiaoshuai and Zhou, Yiyi and Ji, Rongrong}, title = {Scaling the Long Video Understanding of Multimodal Large Language Models via Visual Memory Mechanism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31877-31888} }
Learning 3D Shape Fidelity Metric from Real-world Distortions-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Xuelu and Luan, Tianyu and Zhu, Zixin and Sharma, Akshobhya and Nuney, Phani and Yuan, Junsong and Qiao, Chunming}, title = {Learning 3D Shape Fidelity Metric from Real-world Distortions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28391-28401} }
UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haopeng and Ai, Yihao and Kim, Kabeen and Tan, Robby T. and Chen, Yixin and Wang, Bo}, title = {UDAPose: Unsupervised Domain Adaptation for Low-Light Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13781-13792} }
Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Gole_2026_CVPR, author = {Gole, Siddhant and Pal, Akash and More, Amit and Bhat, S Divakar and Chaudhuri, Subhasis and Banerjee, Biplab}, title = {Hyperbolic Prototype Learning with Uncertainty-Aware Consistency for Continual Test-Time Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34694-34703} }
Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shibata_2026_CVPR, author = {Shibata, Yuto and Yamazaki, Kashu and Jayanti, Lalit and Aoki, Yoshimitsu and Isogawa, Mariko and Fragkiadaki, Katerina}, title = {Learning to Assist: Physics-Grounded Human-Human Control via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38345-38354} }
WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuxuan and Zhou, Zhongao and Yang, Bin and Li, He and Liang, Jian and Chen, Jun and Du, Bo and Ye, Mang}, title = {WHU-MARS: A Multispectral Aerial-Ground Benchmark Towards Any-Scenario Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25461-25471} }
SAGE: Scalable Agentic 3D Scene Generation for Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Hongchi and Li, Xuan and Li, Zhaoshuo and Ma, Qianli and Xu, Jiashu and Liu, Ming-Yu and Cui, Yin and Lin, Tsung-Yi and Ma, Wei-Chiu and Wang, Shenlong and Song, Shuran and Wei, Fangyin}, title = {SAGE: Scalable Agentic 3D Scene Generation for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22358-22368} }
GVIS: Generative Vector Image Steganography-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zihao and Xu, Dawei and Li, Zihan and Zheng, Xixi and Zhang, Chuan}, title = {GVIS: Generative Vector Image Steganography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9384-9393} }
Photo-Guided Tooth Segmentation on 3D Oral Scan Model-
[pdf]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Shaojie and Wei, Guangshun and He, Jiangxin and Zhou, Yuanfeng}, title = {Photo-Guided Tooth Segmentation on 3D Oral Scan Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37558-37567} }
OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance-
[pdf]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zitong and Qiu, Yuda and Ye, Zisheng and Han, Xiaoguang}, title = {OMGTex: One-stage Multi-style Facial Texture Reconstruction without Geometry Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21242-21251} }
Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Sixian and Wang, Yiyao and Song, Xinhang and Zhang, Keming and Xu, Zijian and Jiang, Shuqiang}, title = {Multi-Scale Gaussian-Language Map for Zero-shot Embodied Navigation and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37086-37097} }
Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shinnick_2026_CVPR, author = {Shinnick, Zachary and Jiang, Liangze and Saratchandran, Hemanth and Teney, Damien and van den Hengel, Anton}, title = {Can You Learn to See Without Images? Procedural Warm-Up for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27439-27448} }
LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szymanowicz_2026_CVPR, author = {Szymanowicz, Stanislaw and Chen, Minghao and Wang, Jianyuan and Rupprecht, Christian and Vedaldi, Andrea}, title = {LagerNVS: Latent Geometry for Fully Neural Real-time Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15443-15453} }
Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuesong and Xu, Anke and Cao, Wenbo and Ientilucci, Emmett}, title = {Structure-Aware Representation Distillation for Tiny-Dense Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34775-34783} }
Linear Image Generation by Synthesizing Exposure Brackets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Yuekun and Zhang, Zhoutong and Zhou, Shangchen and Zhao, Nanxuan}, title = {Linear Image Generation by Synthesizing Exposure Brackets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16206-16215} }
Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Dongsheng and Guo, Xinyuan and Zhang, Huijie and Hao, Pingting and Xia, Qiushi}, title = {Quota-Calibrated Fine-Grained Alignment with Context-Aware Marginals for Text-based Person Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31103-31112} }
SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Siyi and Uy, Mikaela Angelina and Song, Chan Hee and Ladhak, Faisal and Murali, Adithyavairavan and Qu, Qing and Birchfield, Stan and Blukis, Valts and Tremblay, Jonathan}, title = {SpaceTools: Tool-Augmented Spatial Reasoning via Double Interactive RL}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37109-37120} }
VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenkai and Ma, Xiaowen and Ni, Zhenliang and Zhang, Dengming and Shu, Han and Jiang, Xin and Chen, Xinghao}, title = {VLM-Pruner: Buffering for Spatial Sparsity in an Efficient VLM Centrifugal Token Pruning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31952-31961} }
Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Yi, Liang and Wang, Shuo and Zhu, Wenbo and Wu, Yongliang and Zhu, Beier and Zhang, Hanwang}, title = {Adapting Point Cloud Analysis via Multimodal Bayesian Distribution Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9976-9985} }
Part$^{2}$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Tianjiao and Shah, Vedant and Wahed, Muntasir and Shen, Ying and Nguyen, Kiet A. and Lourentzou, Ismini}, title = {Part\${\textasciicircum}\{2\}\$GS: Part-aware Modeling of Articulated Objects using 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18913-18923} }
Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Zhi-Hao and Huang, Longfei and Yang, Yang}, title = {Anchor-Guided Gradient Alignment for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37896-37905} }
Stake the Points: Structure-Faithful Instance Unlearning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Kiseong and Shin, JungKyoo and Kim, Eunwoo}, title = {Stake the Points: Structure-Faithful Instance Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24524-24533} }
MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morkva_2026_CVPR, author = {Morkva, Svitlana and Patil, Vaishakh and Tonioni, Alessio and Oechsle, Michael and Wilder-Smith, Maximum and Hutter, Marco}, title = {MOSAIC-GS: Monocular Scene Reconstruction via Advanced Initialization for Complex Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1167-1176} }
Physically Inspired Gaussian Splatting for HDR Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Huimin and Bai, Yue and Wang, Hailing and Fu, Yun}, title = {Physically Inspired Gaussian Splatting for HDR Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11808-11817} }
Understanding and Enforcing Weight Disentanglement in Task Arithmetic-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shangge and Yin, Yuehan and Wang, Lei and Fan, Qi and Shi, Yinghuan and Li, Wenbin and Gao, Yang and Tao, Dacheng}, title = {Understanding and Enforcing Weight Disentanglement in Task Arithmetic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28744-28753} }
Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yingkai and Chen, Chaoqi and Huang, Hui}, title = {Back to Source: Open-Set Continual Test-Time Adaptation via Domain Compensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7957-7966} }
HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xi and Sun, Weiwei and Ren, Zhou and Broaddus, Chris and Huang, Siyu and Guigues, Laurent}, title = {HAD: Hallucination-Aware Diffusion Priors for 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29781-29791} }
Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2026_CVPR, author = {Huo, Tianlin and Ran, Dongchuan and Duan, Ranjie and Zhu, Yao and Du, Peilun and Yao, Ningbo and Yan, Huanqian and Han, Xu and Yun, Qiang and Tan, Yuzheng and Bao, Yang and He, Yuan}, title = {Phantom: Physical Object Interactions as Dynamic Triggers for NMS-Exploited Backdoors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27906-27915} }
Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qianrui and Xu, Hua and Gu, Yunjin and Wang, Yifan and Li, Songze and Zhang, Hanlei}, title = {Evolutionary Multimodal Reasoning via Hierarchical Semantic Representation for Intent Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14979-14989} }
Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Ni and Nie, Shenghao and Luo, Xiaotong and Xie, Yuan and Qu, Yanyun}, title = {Degradation-Consistent Test-Time Adaptation for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15476-15485} }
ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xinxin and Wang, Xue and Zhou, Guoqing and Wang, Qing}, title = {ManifoldNeuS: Manifold-aware View Optimizability for Pose-Free Neural Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {261-271} }
Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yifan and Cao, Yuxin and Zhang, Yedi and He, Wentao and Xiao, Yan and Du, Xianglong and Huang, Zhiyong and Dong, Jin Song}, title = {Towards Stealthy and Effective Backdoor Attacks on Lane Detection: A Naturalistic Data Poisoning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34950-34960} }
One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2026_CVPR, author = {Rao, Haoxiang and Wang, Zhao and Si, Chenyang and Lyu, Yan and Duan, Yuanyi and Zhao, Fang and Shan, Caifeng}, title = {One-to-More: High-Fidelity Training-Free Anomaly Generation with Attention Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28492-28501} }
MARIS: Marine Open-Vocabulary Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingyu and Wang, Feiyu and Zhang, Da and Zhao, Zhiyuan and Gao, Junyu and Li, Xuelong}, title = {MARIS: Marine Open-Vocabulary Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24939-24949} }
Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding-
[pdf]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Li, Ruifan and Lin, Pengyue and Feng, Fangxiang and Ma, Zhanyu and Wang, Xiaojie}, title = {Small Object, Great Challenge: A Benchmark for Small Object Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31822-31832} }
Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2026_CVPR, author = {Bae, Seohui and Kim, Jeonghye and Sung, Youngchul and Lim, Woohyung}, title = {Align While Search: Belief-Guided Exploratory Inference for World-Grounded Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29642-29651} }
Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhongze and Su, Xiu and Yang, Feng and Niu, Dan and You, Shan and Luo, Yueyi and Long, Jun}, title = {Unlearning without Forgetting: Securely Removing Targeted Concepts from Large-Scale Vision-Language Open-Vocabulary Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6271-6281} }
rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Tianyang and Chang, Ming and Chen, Yan and Hu, Yang}, title = {rPPG-VQA: A Video Quality Assessment Framework for Unsupervised rPPG Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1365-1375} }
PE3R: Perception-Efficient 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Jie and Wang, Shizun and Wang, Xinchao}, title = {PE3R: Perception-Efficient 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26877-26887} }
VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Yuntao and Wang, Yiming and Yuan, Renshuo and Yue, Jincheng and Chen, Yijing and Fan, Yue and Zhang, Bo and Li, Qian and Cui, Lizhen}, title = {VKG-QA: Visual Knowledge Graph-based Question Answer for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41213-41223} }
AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Haoyue and Wang, Shengnan and Qiao, Yulin and Zhang, Juncheng and Bai, Youhui and Gong, Ping and Jin, Zewen and Li, Cheng}, title = {AdaCluster: Adaptive Query-Key Clustering for Sparse Attention in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43249-43259} }
InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Weimin and Xu, Suzhe and Ren, Yiwei and Hao, Jinhua and Sun, Ming and Chen, Wenzheng and Sun, He}, title = {InstantViR: Real-Time Video Inverse Problem Solver with Distilled Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16583-16592} }
HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jinkai and Wei, Jiaqing and Jin, Xinxiang and Sun, Yaoqi and Sheng, Xichun and Li, Ming and Qu, Liangqiong and Liu, Xinchen and Liu, Wu}, title = {HyperGait: Unleashing the Power of Parsing for Gait Recognition in the Wild via Hypergraph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18513-18522} }
TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeseong and Son, Geonhui and Lee, Deukhee and Hwang, Dosik}, title = {TWINGS: Thin Plate Splines Warp-aligned Initialization for Sparse-View Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26062-26071} }
CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichi and Shang, Chikai and Yang, Jiacheng and Li, Mengke and Zhou, Yang and Gao, Junlong and Lu, Yang}, title = {CUE: Concept-Aware Multi-Label Expansion to Mitigate Concept Confusion in Long-Tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15030-15039} }
LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xucong and Wang, Pengkun and Zhao, Zhe and Yu, Liheng and Mao, Rui and Wang, Yang}, title = {LOREAL: Mitigating Low-Resolution Challenges in Vision-Language Models with Attribute-driven Prompt Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39152-39163} }
VOSR: A Vision-Only Generative Model for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Rongyuan and Sun, Lingchen and Zhang, Zhengqiang and Kong, Xiangtao and Zhao, Jixin and Wang, Shihao and Zhang, Lei}, title = {VOSR: A Vision-Only Generative Model for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16311-16321} }
Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ousalah_2026_CVPR, author = {Ousalah, Nassim Ali and Rostami, Peyman and Gaudilli\`ere, Vincent and Koumandakis, Emmanuel and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {Cov2Pose: Leveraging Spatial Covariance for Direct Manifold-aware 6-DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40727-40738} }
DriveLaW: Unifying Planning and Video Generation in a Latent Driving World-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Li, Yongkang and Zhou, Lijun and Yao, Jingfeng and Xiong, Kaixin and Sun, Haiyang and Wang, Bing and Ma, Kun and Chen, Guang and Ye, Hangjun and Liu, Wenyu and Wang, Xinggang}, title = {DriveLaW: Unifying Planning and Video Generation in a Latent Driving World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39701-39712} }
Mixture of Prototypes for Test-time Adaptive Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Guangrui and Zhu, Zhengyu and Ge, Yongxin}, title = {Mixture of Prototypes for Test-time Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24990-25000} }
OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aliev_2026_CVPR, author = {Aliev, Ali and Garifullin, Kamil and Yudin, Nikolay and Soboleva, Vera and Molozhavenko, Alexander and Oseledets, Ivan and Alanov, Aibek and Rakhuba, Maxim}, title = {OrthoFuse: Training-free Riemannian Fusion of Orthogonal Style-Concept Adapters for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36009-36018} }
MatMart: Material Reconstruction of 3D Objects via Diffusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiuchao and Zhu, Pengfei and Lyu, Jiangjing and Liu, Xinguo and Guo, Jie and Guo, Yanwen and Xu, Weiwei and Lyu, Chengfei}, title = {MatMart: Material Reconstruction of 3D Objects via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2336-2345} }
No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tao and Ren, Kan and Wan, Gang and Wen, Shibo}, title = {No Labels, No Look-Ahead: Unsupervised Online Video Stabilization with Classical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6868-6877} }
Generative Neural Video Compression via Video Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Qi and Cheng, Hao and Yang, Tinghan and Jin, Libiao and Ma, Siwei}, title = {Generative Neural Video Compression via Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43239-43248} }
LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zekun and An, Sizhe and Tang, Chengcheng and Guo, Chuan and Shugurov, Ivan and Zhang, Linguang and Zhao, Amy and Sridhar, Srinath and Tao, Lingling and Mittal, Abhay}, title = {LLaMo: Scaling Pretrained Language Models for Unified Motion Understanding and Generation with Continuous Autoregressive Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2209-2220} }
Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Kai and Pan, Qingtao and Li, Shuo}, title = {Conditional Factuality Controlled LLMs with Generalization Certificates via Conformal Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3627-3635} }
Stable and Efficient Single-Rollout RL for Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Rui and Yu, Dian and Ke, Lei and Liu, Haolin and Zhou, Yujun and Liang, Zhenwen and Mi, Haitao and Tokekar, Pratap and Yu, Dong}, title = {Stable and Efficient Single-Rollout RL for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12009-12018} }
DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tao and Liao, Xingran and Zhou, Mingliang}, title = {DPGF-Net: Dual-Prior Guided Fusion Network for Joint Assessment of Perceptual Quality and Semantic Consistency in AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19529-19538} }
All-in-One Slider for Attribute Manipulation in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Weixin and Zhu, Hongguang and Wang, Wei and Liu, Yahui and Wang, Mengyu and Nie, Xuecheng}, title = {All-in-One Slider for Attribute Manipulation in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18693-18702} }
Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Ji and Suo, Wei and Wang, Peng and Zhang, Yanning}, title = {Understanding and Mitigating Hallucinations in Multimodal Chain-of-Thought Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40224-40234} }
BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Feiran and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Zhao, Xilin and Cao, Xiaochun and Huang, Qingming}, title = {BlackMirror: Black-Box Backdoor Detection for Text-to-Image Models via Instruction-Response Deviation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30098-30109} }
Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yuxuan and Shi, Fan and Zhu, Rui and Li, Xu and Chen, Xiaolei and Liu, Zhe and Li, Bin and Xue, Xiangyang}, title = {Envision, Attend, Then Respond: Counterfactual Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18261-18272} }
From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Le and Yang, Jihan and Krishnan, Soundarya and Majmudar, Jimit and Ge, Xiou and Puri, Prasoon and Saraf, Prathamesh and Bhargava, Shruti and Piraviperumal, Dhivya and Ling, Yinan and Pan, Cindy and Yu, Hong and Agrawal, Aishwarya and Tseng, Bo-Hsiang}, title = {From Where Things Are to What They Are For: Benchmarking Spatial-Functional Intelligence in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12052-12063} }
Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention-
[pdf]
[supp]
[bibtex]@InProceedings{Ito_2026_CVPR, author = {Ito, Koichiro}, title = {Phrase-Grounding-Aware Supervised Fine-Tuning for Chart Recognition via Side-Masked Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9501-9511} }
Intrinsic Image Fusion for Multi-View 3D Material Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Kocsis_2026_CVPR, author = {Kocsis, Peter and H\"ollein, Lukas and Nie{\ss}ner, Matthias}, title = {Intrinsic Image Fusion for Multi-View 3D Material Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22571-22580} }
SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zicheng and Meng, Xiangting and Wu, Ke and Ding, Wenchao}, title = {SparseSplat: Towards Applicable Feed-Forward 3D Gaussian Splatting with Pixel-Unaligned Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5049-5058} }
Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Chenxi and Yang, Yanming and Zhao, Tong and Li, Ruibo and Zhang, Chi}, title = {Taming Video Models for 3D and 4D Generation via Zero-Shot Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40352-40363} }
Learning Eigenstructures of Unstructured Data Manifolds-
[pdf]
[supp]
[bibtex]@InProceedings{Velich_2026_CVPR, author = {Velich, Roy and Piven, Arkadi and Bensa{\"\i}d, David and Cremers, Daniel and Dag\`es, Thomas and Kimmel, Ron}, title = {Learning Eigenstructures of Unstructured Data Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36201-36214} }
E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yunsoo and Sung, Changki and Hong, Dasol and Myung, Hyun}, title = {E2EGS: Event-to-Edge Gaussian Splatting for Pose-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4922-4931} }
Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wenbin and Iwaguchi, Takafumi and Sagawa, Ryusuke and Kawasaki, Hiroshi}, title = {Revisiting Optimal Coding for I-ToF under Practical Sensor Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12501-12510} }
Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Kaihua and Qi, Jiaxin and Ou, Jinli and Zheng, Yuhua and Huang, Jianqiang}, title = {Scaling Test-Time Robustness of Vision-Language Models via Self-Critical Inference Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39361-39371} }
DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Srivastava_2026_CVPR, author = {Srivastava, Divyansh and Mehra, Akshay and Maneriker, Pranav and Sanyal, Debopam and Raj, Vishnu and Kamarshi, Vijay and Du, Fan and Kimball, Joshua}, title = {DPAR: Dynamic Patchification for Efficient Autoregressive Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23215-23226} }
TrackMAE: Video Representation Learning via Track Mask and Predict-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vandeghen_2026_CVPR, author = {Vandeghen, Renaud and Thoker, Fida Mohammad and Van Droogenbroeck, Marc and Ghanem, Bernard}, title = {TrackMAE: Video Representation Learning via Track Mask and Predict}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13604-13614} }
Spectral Mixture-of-Experts for Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Chen and Dong, Xingbo and Shen, Xuelin and Jin, Zhe}, title = {Spectral Mixture-of-Experts for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39972-39982} }
Learning Convex Decomposition via Feature Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuezhi and Huang, Qixing and Uy, Mikaela Angelina and Sharp, Nicholas}, title = {Learning Convex Decomposition via Feature Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36190-36200} }
PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinyuan and Gu, Hanlin and Song, Guibao and Zhu, Gongxi and Zou, Yifei and Fan, Lixin and Han, Yuxing}, title = {PrivSynth: Alternating and Control-Based Optimization for Privacy and Utility in Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17430-17439} }
Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Wu, Benyu and Hong, Jie and Zhou, Kun}, title = {Nonparametric Deep Fine-grained Clustering with Low-Rank Guided Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2434-2444} }
FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Chenhan and Chen, Yu and Zhang, Qingwen and Song, Jifei and Xu, Songcen and Yeung, Dit-Yan and Deng, Jiankang}, title = {FreeScale: Scaling 3D Scenes via Certainty-Aware Free-View Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {330-340} }
CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yucheng and Wang, Zedong and Wu, Yuetong and Ma, Yue and Xu, Dan}, title = {CARE-Edit: Condition-Aware Routing of Experts for Contextual Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9019-9028} }
PGA: Prior-free Generative Attack for Practical No-box Scenario-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Hongyu and Yuan, Xiang and Cheng, Gong}, title = {PGA: Prior-free Generative Attack for Practical No-box Scenario}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13296-13305} }
Translating Signals to Languages for sEMG-Based Activity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ming and Qu, Haoxuan and Ke, Qiuhong and Zhou, Wei and Rahmani, Hossein and Liu, Jun}, title = {Translating Signals to Languages for sEMG-Based Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9317-9329} }
Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yingjie and Wang, Yi and Wang, Jiaze and Liu, Anfeng and Tian, Zhuotao}, title = {Beyond Binary Contrast: Modeling Continuous Skeleton Action Spaces with Transitional Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6075-6084} }
Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Sanghyeok and Ahn, Pyunghwan and Song, Gwangmo and Kim, Seung Hwan and Lee, Honglak and Han, Bohyung}, title = {Enhancing Mixture-of-Experts Specialization via Cluster-Aware Upcycling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11283-11292} }
M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Junqiao and Zhou, Yunjiao and Yang, Yizhuo and Cui, Xinyuan and Zhang, Jiarui and Xie, Lihua and Yang, Jianfei and Lu, Chris Xiaoxuan and Ding, Fangqiang}, title = {M4Human: A Large-Scale Multimodal mmWave Radar Benchmark for Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42836-42846} }
Deep Feature Deformation Weights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Richard and Lang, Itai and Hanocka, Rana}, title = {Deep Feature Deformation Weights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27378-27387} }
ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Junsik and Bang, Gun and Kim, Soowoong}, title = {ELiC: Efficient LiDAR Geometry Compression via Cross-Bit-depth Feature Propagation and Bag-of-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39011-39020} }
PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2026_CVPR, author = {Shang, Junpeng and Shao, Feifei and Xiao, Jun and Li, Lin and Wang, Hongwei and Ma, Dongfang}, title = {PV-Ground: Text-Guided Point-Voxel Interaction for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38657-38667} }
StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Jinghao and Zhang, Yuhe and Geng, Guohua and Li, Kang and Zhang, Han}, title = {StoryTailor:A Zero-Shot Pipeline for Action-Rich Multi-Subject Visual Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21922-21931} }
PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chopra_2026_CVPR, author = {Chopra, Samarth and Liang, Jing and Seneviratne, Gershom and Manocha, Dinesh}, title = {PhysGS: Bayesian-Inferred Gaussian Splatting for Physical Property Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18980-18990} }
GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Zixuan and Zhang, Jing and Wang, Di and Zhou, Zidie and Liu, Wenbin and Guo, Haonan and Wang, En and Du, Bo}, title = {GeoBridge: A Semantic-Anchored Multi-View Foundation Model Bridging Images and Text for Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27793-27803} }
Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Donghai and Li, Yongheng and Wang, Zhen and Zeng, Yuansong and Min, Wenwen}, title = {Adapting a Pre-trained Single-Cell Foundation Model to Spatial Gene Expression Generation from Histology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5720-5729} }
Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Hao and Zhu, Muzhi and Zeng, Shenyan and Li, Anzhou and Chen, Cong and Geng, Hua and Shi, Duochao and Ye, Wentao and Lin, Tao and Chen, Hao and Shen, Chunhua}, title = {Eliciting Complex Spatial Reasoning in MLLMs through Wide-Baseline Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16768-16778} }
Learning Differentiable Hierarchies in 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Youqi and Zhou, Wugen and Zha, Hongbin}, title = {Learning Differentiable Hierarchies in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40939-40948} }
GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Enda and Ma, Haoxiang and Ma, Xinzhu and Liu, Zicheng and Huang, Di}, title = {GraspLDP: Towards Generalizable Grasping Policy via Latent Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28032-28041} }
Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zaleska_2026_CVPR, author = {Zaleska, Katarzyna and Popek, {\L}ukasz and Wysocza\'nska, Monika and Deja, Kamil}, title = {Attention, May I Have Your Decision? Localizing Generative Choices in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30854-30863} }
Multi-speaker Attention Alignment for Multimodal Social Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Liangyang and Huang, Yifei and Zhang, Mingfang and Kang, Caixin and Furuta, Ryosuke and Sato, Yoichi}, title = {Multi-speaker Attention Alignment for Multimodal Social Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24608-24619} }
Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yuwei and Lu, Guoyu}, title = {Underground Plant Exploration: Non-Destructive 3D Root Assessment with GPR Based on Point Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15616-15626} }
OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Hengrui and Gu, Zhuangcheng and Zhao, Zhiyuan and Wen, Zichen and Wang, Bin and Li, Weijia and He, Conghui}, title = {OmniDocLayout: Towards Diverse Document Layout Generation via Coarse-to-Fine LLM Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3208-3218} }
Few-for-Many Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ping and Zhang, Tiantian and Lin, Xi and Li, Xiang and Tang, Zhi-Ri and Zhang, Qingfu}, title = {Few-for-Many Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17515-17524} }
Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Wang, Wenjing and Li, Chen and Lyu, Jing and Lin, Di}, title = {Video Generation with Stable Transparency via Shiftable RGB-A Distribution Learner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1885-1894} }
Lenses: Toward Polysemous Vision-Language Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Alomari_2026_CVPR, author = {Alomari, Hani and Asgarov, Ali and Thomas, Chris}, title = {Lenses: Toward Polysemous Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37810-37820} }
Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Wooseong and Lee, Wonyoung and Yoon, Kuk-Jin}, title = {Preference-Aligned LoRA Merging: Preserving Subspace Coverage and Addressing Directional Anisotropy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {823-835} }
Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiangyu and Jiang, Haoyi and Liu, Liu and Nam, Seungtae and Kang, Gyeongjin and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Liu, Wenyu and Wang, Xinggang and Park, Eunbyung}, title = {Uni3R: Unified 3D Reconstruction and Semantic Understanding via Generalizable Gaussian Splatting from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33280-33290} }
HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tateno_2026_CVPR, author = {Tateno, Masatoshi and Kato, Gido and Kataoka, Hirokatsu and Sato, Yoichi and Yagi, Takuma}, title = {HanDyVQA: A Video QA Benchmark for Fine-Grained Hand-Object Interaction Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3455-3465} }
Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Patwari_2026_CVPR, author = {Patwari, Kartik and Vesdapunt, Noranart and Wang, Chien-Yi and Li, Dawei and Huynh, Cong Phuoc and Zhou, Ning and Chuah, Chen-Nee and Fu, Kah Kuen}, title = {Composite-Attribute Person Re-Identification via Pose-Guided Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13812-13823} }
AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoxue and Zheng, Xiaoxu and Yin, Yixuan and Zhao, Tiao and Tang, Kaihua and Mi, Michael Bi and Xu, Zhan and Chen, Dave Zhenyu}, title = {AnchorSplat: Feed-Forward 3D Gaussian Splatting With 3D Geometric Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18924-18933} }
Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zejian and Ma, Jiarui and Xu, Han and Zheng, Weiting and Zhu, Yangrui and Meng, Chenye and Chen, Pei and Yang, Ling and Yang, Zhiyuan and Yang, Changyuan and Yang, Guang and Koh, Immanuel and Sun, Lingyun}, title = {Circular-DPO: Aligning Multi-Stage 3D Generative Models via Preference Feedback Loop}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32592-32601} }
YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shasha and Li, Chong and Wang, Xinning and Li, Xuebo}, title = {YOLO-ULM: Ultra-Lightweight Models for Real-Time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18418-18427} }
ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Mengling and You, Sisi and Li, Yaning and Bao, Bing-Kun}, title = {ProcessMaker: A Generalized Process Visualization Framework with Adaptive Sequence Steps on Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25699-25708} }
Align Images Before You Generate-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Shen, Qiuhong and Wang, Xinchao}, title = {Align Images Before You Generate}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30521-30531} }
Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Qi, Lu and Li, Xiangtai and Zhang, Jie and Liu, Yi and Yang, Xu and Fan, Mingyu and Luo, Fei}, title = {Recover to Predict: Progressive Retrospective Learning for Variable-Length Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17863-17873} }
SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yang and Zhang, Jiqing and Sun, Chuanyu and Liu, Qianhui and Ge, Huilin and Wei, Ziqi and Yang, Xin}, title = {SpikeTrack: High-performance and Energy-efficient Event-Based Object Tracking with Spiking Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {926-935} }
CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Li and Chen, Weikai and Wang, Yujie and Yin, Yingda and Hu, Zeyu and Zhang, Runze and Luo, Keyang and Qian, Shengju and Wang, Xin and Qin, Xueying}, title = {CoSMo3D: Open-World Promptable 3D Semantic Segmentation through LLM-Guided Canonical Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14325-14334} }
Stabilizing Streaming Video Geometry via Dynamic Feature Normalization-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Xiaoyang and Liu, Muxin and Wu, Xiaoshan and Wang, Ruicheng and Huang, Yi-Hua and Sun, Yang-Tian and Shi, Shaoshuai and Qi, Xiaojuan}, title = {Stabilizing Streaming Video Geometry via Dynamic Feature Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7577-7587} }
Learning to Solve PDEs on Neural Shape Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Welschinger_2026_CVPR, author = {Welschinger, Lilian and Liu, Yilin and Wang, Zican and Mitra, Niloy J.}, title = {Learning to Solve PDEs on Neural Shape Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20263-20272} }
FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Jing and Mu, Lingzhou and Jiang, Fan and Ma, Chengcheng and Xu, Mu and Qi, Yonggang}, title = {FantasyVLN: Unified Multimodal Chain-of-Thought Reasoning for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29674-29683} }
Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tianxiao and Huang, Zhenglin and Wen, Haiquan and He, Yiwei and Li, Xinze and Zhu, Bingyu and Duan, Wuhui and Chen, Congang and Fu, Zeyu and Dong, Yi and Wu, Baoyuan and Li, Xiangtai and Cheng, Guangliang}, title = {Omni-Fake: Benchmarking Unified Multimodal Social Media Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30299-30311} }
Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Jianghao and Li, Qingbin and Sun, Kun and Ding, Cheng and Wang, Jie and Chen, Qin and Zhou, Jie and Wang, Nan and Li, Changqing and Wu, Pei and Xu, Jian and Yang, Zheming and He, Liang}, title = {Mimic Human Cognition, Master Multi-Image Reasoning: A Meta-Action Framework for Enhanced Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19253-19264} }
PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Wubin and Gai, Shaoyan and Da, Feipeng}, title = {PoseGaussian: 6D Pose Estimation for Unseen Objects via Sparse-View Object-Level 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4698-4707} }
ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Hao and Wang, Jiahao and Zhang, Yaolun and Wang, Ruohui and Zheng, Xuanyu and Tang, Yepeng and Lin, Dahua and Lu, Lewei}, title = {ELV-Halluc: Benchmarking Semantic Aggregation Hallucinations in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32572-32581} }
AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zijian and Huang, Qiusheng and Guo, Anboyu and Zhong, Xiaohui and Li, Hao}, title = {AviaSafe: A Physics-Informed Data-Driven Model for Aviation Safety-Critical Cloud Forecasts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33143-33152} }
Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, ZeBin and Hu, Yang and Bi, Xiuli and Liu, Bo and Xiao, Bin}, title = {Select, Hypothesize and Verify: Towards Verified Neuron Concept Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31316-31325} }
M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anugraha_2026_CVPR, author = {Anugraha, David and Irawan, Patrick Amadeus and Singh, Anshul and Lee, En-Shiun Annie and Winata, Genta Indra}, title = {M4-RAG: A Massive-Scale Multilingual Multi-Cultural Multimodal RAG}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23083-23094} }
SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Wentao and Kong, Fanzhen and Kang, Zejian and Huang, Xiangru}, title = {SparseOIT: Improving Order-Independent Transparency 3DGS via Active Set Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41012-41021} }
Hypergraph-State Collaborative Reasoning for Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Zikai and Yu, Junqing and Chen, Yi-Ping Phoebe and Yang, Wei and Wang, Xinchao}, title = {Hypergraph-State Collaborative Reasoning for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28123-28133} }
Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuefei and Horstmann, Kai and Lin, Ethan and Chen, Jonathan and Farhang, Alexander and Stiles, Sophia and Sehgal, Atharva and Light, Jonathan and Van Valen, David and Yue, Yisong and Sun, Jennifer J.}, title = {Simple Agents Outperform Experts in Biomedical Imaging Workflow Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13680-13690} }
M^3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyeongcheol and Seo, Jiyoung and Mun, Jaewon and Park, Hogun and Byeon, Wonmin and Kim, Sung June and Im, Hyeonsoo and Lee, JeungSub and Kim, Sangpil}, title = {M{\textasciicircum}3KG-RAG: Multi-hop Multimodal Knowledge Graph-enhanced Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14968-14978} }
Guiding Token-Sparse Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krause_2026_CVPR, author = {Krause, Felix and Baumann, Stefan Andreas and Schusterbauer, Johannes and Grebenkova, Olga and Gui, Ming and Hu, Vincent Tao and Ommer, Bj\"orn}, title = {Guiding Token-Sparse Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35789-35799} }
Foundation Encoders Are All You Need for Preference-Aware Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hyungjin and Ahn, Seokho and Seo, Young-Duk}, title = {Foundation Encoders Are All You Need for Preference-Aware Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14692-14701} }
SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chen and Dong, Shanshan and Qiu, Sheng and Han, Jianmin and Zhao, Yibo and Gao, Zan and Komura, Taku and Huang, Kemeng}, title = {SmokeSVD: Smoke Reconstruction from A Single View via Progressive Novel View Synthesis and Refinement with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7414-7424} }
FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Huy Q. and Nguyen, Loc X. and Qiao, Yu and Kim, Seong Tae and Huh, Eui-Nam and Hong, Choong Seon}, title = {FedDAP: Domain-Aware Prototype Learning for Federated Learning under Domain Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3390-3399} }
RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yang and Dong, Yuhao and Ding, Yue and Wang, Yuran and Zhu, Xuanyu and Zhou, Sheng and Liu, Wenting and Tian, Haochen and Wang, Rundong and Wang, Huanqian and Liu, Zuyan and Zeng, Bohan and Chen, Ruizhe and Wang, Qixun and Zhang, Zhuoran and Chen, Xinlong and Tong, Chengzhuo and Li, Bozhou and Liu, Qiang and Wang, Haotian and Yang, Wenjing and Zhang, Yuanxing and Wan, Pengfei and Zhang, Yi-Fan and Liu, Ziwei}, title = {RealUnify: Do Unified Models Truly Benefit from Unification? A Comprehensive Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22488-22497} }
ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanlei and Shao, Jiahao and Chen, Xinya and Tan, Xiyang and Miao, Sheng and Shen, Yujun and Liao, Yiyi}, title = {ScenDi: 3D-to-2D Scene Diffusion Cascades for Urban Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40291-40302} }
Grounded Chain-of-Thought for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Qiong and Yang, Xiangcong and Zhou, Yiyi and Fang, Chenxin and Song, Baiyang and Sun, Xiaoshuai and Ji, Rongrong}, title = {Grounded Chain-of-Thought for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33577-33587} }
MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zichen and Yu, Yue and Ouyang, Hao and Wang, Qiuyu and Ma, Shuailei and Cheng, Ka Leong and Wang, Wen and Bai, Qingyan and Zhang, Yuxuan and Zeng, Yanhong and Li, Yixuan and Zhu, Xing and Shen, Yujun and Chen, Qifeng}, title = {MagicQuill V2: Precise and Interactive Image Editing with Layered Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22467-22477} }
MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2026_CVPR, author = {Rahman, Md Maklachur and Jung, Soon Ki and Hammond, Tracy}, title = {MambaLiteUNet: Cross-Gated Adaptive Feature Fusion for Robust Skin Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8556-8565} }
Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarkar_2026_CVPR, author = {Sarkar, Hiran and Kuang, Liming and Velikova, Yordanka and Busam, Benjamin}, title = {Node-RF: Learning Generalized Continuous Space-Time Scene Dynamics with Neural ODE-based NeRFs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15411-15420} }
EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bartolomei_2026_CVPR, author = {Bartolomei, Luca and Tosi, Fabio and Poggi, Matteo and Mattoccia, Stefano and Gallego, Guillermo}, title = {EventHub: Data Factory for Generalizable Event-Based Stereo Networks without Active Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37063-37074} }
Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Liang, Qihua and Zhong, Bineng and Zeng, Shuimu and Xue, Yuanliang and Li, Ning and Song, Shuxiang}, title = {Boosting Self-Supervised Tracking with Contextual Prompts and Noise Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35197-35206} }
EI-Part:Explode for Completion and Implode for Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Wanhu and Luo, Zhongjin and Zheng, Heliang and Chang, Jiahao and Ye, Chongjie and He, Huiang and Zhao, Shengchu and Jia, Rongfei and Han, Xiaoguang}, title = {EI-Part:Explode for Completion and Implode for Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27007-27017} }
UniGame: Turning a Unified Multimodal Model Into Its Own Adversary-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Zhaolong and Lu, Wang and Chen, Hao and Li, Sharon and Wang, Jindong}, title = {UniGame: Turning a Unified Multimodal Model Into Its Own Adversary}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37632-37641} }
Global Underwater Geolocation from Time-Lapse Polarization Imagery-
[pdf]
[supp]
[bibtex]@InProceedings{Aghajanzadeh_2026_CVPR, author = {Aghajanzadeh, Sara and Bai, Xiaoyang and Zhu, Zhongmin and Forsyth, David and Gruev, Viktor}, title = {Global Underwater Geolocation from Time-Lapse Polarization Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6464-6473} }
Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kaibing and Wang, Yucheng and Luo, Tingzhang}, title = {Assignment-Driven Hash Learning in a Hyper-Semantic Space for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11303-11312} }
MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Lulu and Xiao, Wenhu and Chen, Xin and Xu, Xinhua and Xu, Bowen and Li, Kun and Tao, Yongliang}, title = {MASQuant: Modality-Aware Smoothing Quantization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8707-8716} }
CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yu and Cai, Yujun and Zhang, Chi}, title = {CRAFT-LoRA: Content-Style Personalization via Rank-Constrained Adaptation and Training-Free Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7654-7663} }
GS^2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xianben and Wang, Tao and Li, Yuxuan and Jin, Yi and Ling, Haibin}, title = {GS{\textasciicircum}2: Graph-based Spatial Distribution Optimization for Compact 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33259-33268} }
PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Jianjian and Chen, Tao and Chen, Yi and Pei, Gensheng and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {PCA-Seg: Revisiting Cost Aggregation for Open-Vocabulary Semantic and Part Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27633-27643} }
Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Yapeng and Yang, Lin and Chen, Yuguo and Chen, Xiangru and Wang, Taoyi and Wang, Lijian and Yang, Zheyu and Lin, Yihan and Zhao, Rong}, title = {Spatio-Temporal Difference Guided Motion Deblurring with the Complementary Vision Sensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37496-37506} }
LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaoyang and Liu, Xinguo}, title = {LaRP: Efficient Multi-View Inpainting with Latent Reprojection Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21772-21783} }
Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Yuwen and Zhou, Kanglei and Li, Chang and Wang, Liyuan}, title = {Geometry-Aware Cross-Modal Graph Alignment for Referring Segmentation in 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20433-20442} }
Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Youhan and Rao, Jiahua and Du, Kangrui and Xie, Jiancong and Yang, Yuedong}, title = {Predicting Spatial Transcriptomics from Histology Images via High-Order Multi-Cell Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19781-19790} }
Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Fu and Xie, Yucheng and Shi, Ruixiao and Yang, Xu and Wang, Jing and Geng, Xin}, title = {Breaking Semantic Boundaries: Distribution-Guided Semantic Exploration for Creative Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14253-14262} }
ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2026_CVPR, author = {Ying, Jiaying and Du, Heming and Zhang, Kaihao and Tweedy, Sean M. and Yu, Xin}, title = {ResiHMR: Residual-Limb Aware Single-Image 3D Human Mesh Recovery for Individuals with Limb Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13940-13950} }
RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuhong and Gao, Zihan and Li, Shengpeng and Chen, Ling-Hao and Liu, Kaisheng and Cheng, Runqing and Lin, Xiao and Liu, Junjia and Li, Zhuoheng and Feng, Jingyi and He, Ziyan and Lin, Jintian and Huang, Zheyan and Liu, Zhifang and Wang, Haoqian}, title = {RoboWheel: A Data Engine from Real-World Human Demonstrations for Cross-Embodiment Robotic Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6664-6674} }
Motus: A Unified Latent Action World Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Hongzhe and Tan, Hengkai and Xie, Shenghao and Wang, Zeyuan and Huang, Shuhe and Liu, Haitian and Zhao, Ruowen and Feng, Yao and Xiang, Chendong and Rong, Yinze and Zhao, Hongyan and Liu, Hanyu and Su, Zhizhong and Ma, Lei and Su, Hang and Zhu, Jun}, title = {Motus: A Unified Latent Action World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35101-35113} }
Neural Distribution Prior for LiDAR Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zizhao and Xiang, Zhengkang and Ao, Jiayang and Liu, Feng and West, Joseph and Khoshelham, Kourosh}, title = {Neural Distribution Prior for LiDAR Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3035-3045} }
Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Chengwei and Ma, Fan and Quan, Ruijie and Xu, Yunqiu and Zhan, Kun and Yang, Yi}, title = {Echoes of Ownership: Adversarial-Guided Dual Injection for Copyright Protection in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20650-20659} }
Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhihao and Dong, Shengwei and Yi, Chuang and Gao, Junxuan and Lai, Zhilu and Liu, Zhiqiang and Wang, Wei and Zhang, Guangtao}, title = {Physics-Consistent Diffusion for Efficient Fluid Super-Resolution via Multiscale Residual Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30574-30583} }
Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Zeng, Guanning and Hu, Zhanhao and Zhu, Jun and Hu, Xiaolin}, title = {Physical Adversarial Clothing Evades Visible-Thermal Detectors via Non-Overlapping RGB-T Pattern}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13356-13365} }
LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding-
[pdf]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Rongge and Dong, Chengqi and Zhou, S Kevin}, title = {LazyVAR: Accelerating Visual Autoregressive Models via Scale-wise Token Pruning and Parallel Group Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12129-12139} }
HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Huizhi and Shen, Yichao and Deng, Yu and Xu, Sicheng and Feng, ZhiYuan and Zhang, Tong and Liang, Yaobo and Yang, Jiaolong}, title = {HiSpatial: Taming Hierarchical 3D Spatial Understanding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2502-2514} }
GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Butt_2026_CVPR, author = {Butt, Muhammad Atif and Gomez-Villa, Alexandra and Wu, Tao and Vazquez-Corral, Javier and Van De Weijer, Joost and Wang, Kai}, title = {GenColorBench: A Color Evaluation Benchmark for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36638-36648} }
CI-VID: A Coherent Interleaved Text-Video Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ju_2026_CVPR, author = {Ju, Yiming and Hu, Jijin and Luo, Zhengxiong and Deng, Haoge and Zhao, hanyu and Du, Li and Xiao, Wenbo and Wu, Chengwei and Hao, Donglin and Wang, Xinlong and Pan, Tengfei}, title = {CI-VID: A Coherent Interleaved Text-Video Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25568-25577} }
FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihui and Fu, Yuhang and Du, Mengmeng and Yuan, Zhimin and Liu, Yachen and Liao, Weisheng and Wang, Kaiyu and Wang, Zheng}, title = {FedRAC: Rolling Submodel Allocation for Collaborative Fairness in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31802-31811} }
SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingliang and Chu, Wenda and Li, Yizhuo and Yang, Linjie and Yue, Yisong and Bouman, Katherine and Song, Yang and Guo, Qiushan}, title = {SpeeDiff: Scalable Pixel-Anchored End-to-End Latent Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35893-35903} }
PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Hongyu and Luo, Kunming and Li, Weiyu and Zhang, Kaiyi and Liang, Yixun and Huang, Jingwei and Guo, Chunchao and Tan, Ping}, title = {PoseMaster: A Unified 3D Native Framework for Stylized Pose Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34292-34302} }
AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiteng and Xia, Mingyuan and Zhang, Jingyuan and Hui, Zheng and Qin, Haotong and Kong, Linghe and Zhang, Yulun and Yang, Xiaokang}, title = {AdaSVD: Singular Value Decomposition with Adaptive Mechanisms for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26358-26368} }
EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qinfu and Pan, Liyuan and Wei, Yiwei and Yuan, Shaozu and Chen, Jiaqi and Liu, Tianyu}, title = {EmoThinker: Advancing Visual-Acoustic Emotion Analysis via Structural Token Selection and Chain-of-Thought Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1672-1682} }
ARC Is a Vision Problem!-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Keya and Cy, Ali and Qiu, Linlu and Ding, Xiaoman Delores and Wang, Runqian and Zhu, Yeyin Eva and Andreas, Jacob and He, Kaiming}, title = {ARC Is a Vision Problem!}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2537-2546} }
Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Ma, Jilin and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33794-33804} }
CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jianxiong and Liu, Yichang and Yang, Baofeng and Feng, Jianfeng and Fu, Yanwei}, title = {CineBrain: A Large-Scale Multi-Modal Audiovisual Brain Dataset for Brain-Conditioned Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36224-36234} }
VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Jihwan and Do, Jaeyoung}, title = {VIRST: Video-Instructed Reasoning Assistant for SpatioTemporal Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3410-3420} }
Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yaoteng and Zhou, Qing and Gao, Junyu and Wang, Qi}, title = {Beyond Prompt Degradation: Prototype-guided Dual-pool Prompting for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27568-27578} }
MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yuxia and Zhang, Zhizhen and Zhang, Yuqi and Wang, Zijian and Huang, Zi and Luo, Yadan}, title = {MergeVLA: Cross-Skill Model Merging Toward a Generalist Vision-Language-Action Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22335-22347} }
HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Joungbin and Grauman, Kristen}, title = {HieraMamba: Video Temporal Grounding via Hierarchical Anchor-Mamba Pooling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16954-16965} }
WeDetect: Fast Open-Vocabulary Object Detection as Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Shenghao and Su, Yukun and Rao, Fengyun and LYU, Jing and Xie, Xiaohua and Zheng, Wei-Shi}, title = {WeDetect: Fast Open-Vocabulary Object Detection as Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20377-20387} }
Latent Implicit Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kelvin and Shang, Chuyi and Karlinsky, Leonid and Feris, Rogerio and Darrell, Trevor and Herzig, Roei}, title = {Latent Implicit Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33457-33466} }
Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and blank, Studios and Zhang, Yang and blank, Studios and Mehl, Lukas and blank, Studios and Gross, Markus and blank, Studios and Schroers, Christopher and blank, Studios}, title = {Guardians of the Hair: Rescuing Soft Boundaries in Depth, Stereo, and Novel Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19822-19832} }
Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Boya and Guan, Naiyang and Yi, Xiaodong}, title = {Real-Time Dynamic Scene Rendering with Controlled Compressibility and Contact Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8308-8318} }
Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Zhang, Xiaoqin and Lu, Huchuan and Zhang, Lihe}, title = {Complementary Prototype Mapping for Efficient Multimodal Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14178-14187} }
Content-Adaptive Hierarchical Hyperprior for Neural Video Coding-
[pdf]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Junqi and Wu, Yaojun and Lin, Chaoyi and Deng, Zhipin and Li, Li and Liu, Dong and Sun, Xiaoyan}, title = {Content-Adaptive Hierarchical Hyperprior for Neural Video Coding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20953-20962} }
Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Subin and Mo, Sangwoo and Rizve, Mamshad Nayeem and Xu, Yiran and Liu, Difan and Shin, Jinwoo and Hinz, Tobias}, title = {Rethinking Prompt Design for Inference-time Scaling in Text-to-Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22090-22099} }
Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Chengyin and Wang, Xin and Qiu, Rui and Jia, Zhe and Zhao, Yingying and Wang, Kai and Kang, Xu and Wei, Yiwei}, title = {Fractal Camouflage: A Bio-Inspired Approach for Multi-Scale Adversarial Attacks in the Infrared Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34981-34990} }
TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Yang, Li and Zhan, Gen and Zhang, Yabin and Liao, Yiting and Li, Junlin and Fu, Deliang and Zhang, Li and Wang, Limin}, title = {TempR1: Improving Temporal Understanding of MLLMs via Temporal-Aware Multi-Task Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2756-2767} }
Finding Distributed Object-Centric Properties in Self-Supervised Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawlekar_2026_CVPR, author = {Rawlekar, Samyak and Swain, Amitabh and Cai, Yujun and Wang, Yiwei and Yang, Ming-Hsuan and Ahuja, Narendra}, title = {Finding Distributed Object-Centric Properties in Self-Supervised Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31326-31335} }
DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Chang and Li, Changlin and Liu, Songtao and Zhong, Zhao and Huang, Kailin and Zhang, Linfeng}, title = {DisCa: Accelerating Video Diffusion Transformers with Distillation-Compatible Learnable Feature Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4590-4601} }
Spike-driven Discrete Aggregation for Event-based Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Huaning and Wang, Ziming and Jiang, Runhao and Rui, Yan and Tang, Huajin}, title = {Spike-driven Discrete Aggregation for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15135-15144} }
KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Chen, Wenhao and Wang, Yongtao and Yang, Ming-Hsuan}, title = {KnowVal: A Knowledge-Augmented and Value-Guided Autonomous Driving System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3740-3749} }
Reframing Long-Tailed Learning via Loss Landscape Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shenghan and Liu, Yiming and Wang, Yanzhen and Wang, Yujia and Lu, Xiankai}, title = {Reframing Long-Tailed Learning via Loss Landscape Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22227-22237} }
EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Da and Engel, Dominik and Luo, Deng and Viola, Ivan}, title = {EfficientMonoHair: Fast Strand-Level Reconstruction from Monocular Video via Multi-View Direction Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7610-7619} }
OSMO: Open-vocabulary Self-eMOtion Tracking-
[pdf]
[bibtex]@InProceedings{Abdelfattah_2026_CVPR, author = {Abdelfattah, Mohamed and Tekin, Bugra and Sener, Fadime and Camgoz, Necati Cihan and Sauser, Eric and Ma, Shugao and Alahi, Alexandre and Remelli, Edoardo}, title = {OSMO: Open-vocabulary Self-eMOtion Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1737-1748} }
DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Yichen and Song, Jyun-Ting and Jung, Siyeol and blank, Ulsan National Institute of Science \& Technology and Liu, Ruofan and Liu, Haiyang and Chu, Xuangeng and Liu, Ruicong and Wu, Erwin and Koike, Hideki and Kitani, Kris}, title = {DyaDiT: A Multi-Modal Diffusion Transformer for Socially Favorable Dyadic Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10932-10942} }
KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems-
[pdf]
[supp]
[bibtex]@InProceedings{Kheirandish_2026_CVPR, author = {Kheirandish, Alireza and Hong, Jihoon and Fridovich-Keil, Sara}, title = {KLIP: Localized Distribution Shift Detection via KL-Divergence with Diffusion Priors in Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30823-30832} }
Precise Object and Effect Removal with Adaptive Target-Aware Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jixin and Wang, Zhouxia and Yang, Peiqing and Zhou, Shangchen}, title = {Precise Object and Effect Removal with Adaptive Target-Aware Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19370-19379} }
Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Ziheng and Ji, Yuheng and Tao, Renshuai and Tian, Yuxuan and Liu, Yuyang and Wang, Yipu and Zheng, Xiaolong}, title = {Scaling Up AI-Generated Image Detection with Generator-Aware Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43008-43017} }
ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yeonkyung and Ju, Dayun and Kim, Youngmin and Kang, Seil and Hwang, Seong Jae}, title = {ViKey: Enhancing Temporal Understanding in Videos via Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38880-38890} }
CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yansong and Qiu, Zhongxi and Tian, Yun and Jinyu, Zheng and Li, Shuo}, title = {CMR-RD: Long-Tailed Adaptive VLM for Explainable CMR Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7003-7013} }
EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Runjia and Haji-Ali, Moayed and Mirzaei, Ashkan and Wang, Chaoyang and Sahni, Arpit and Skorokhodov, Ivan and Siarohin, Aliaksandr and Jakab, Tomas and Han, Junlin and Tulyakov, Sergey and Torr, Philip and Menapace, Willi}, title = {EgoEdit: Dataset, Real-Time Streaming Model, and Benchmark for Egocentric Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16042-16053} }
SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Anindita and Golyanik, Vladislav and Komura, Taku and Slusallek, Philipp and Theobalt, Christian and Dabral, Rishabh}, title = {SceMoS: Scene-Aware 3D Human Motion Synthesis by Planning with Geometry-Grounded Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16443-16453} }
Extending Embodied Question Answering from Perception to Decision-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Xicheng and Li, Qiwei and Xu, Peiran and Mu, Yadong}, title = {Extending Embodied Question Answering from Perception to Decision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29567-29577} }
TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang-Lin_2026_CVPR, author = {Jiang-Lin, Jian-Yu and Huang, Kang-Yang and Zou, Ling and Lo, Ling and Yang, Sheng-Ping and Tseng, Yu-Wen and Lin, Kun-Hsiang and Chen, Chia-Ling and Ta, Yu-Ting and Wang, Yan-Tsung and Chen, Po-Ching and Xie, Hongxia and Shuai, Hong-Han and Cheng, Wen-Huang}, title = {TriDF: Evaluating Perception, Detection, and Hallucination for Interpretable DeepFake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17087-17098} }
Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Zijian and Sun, Zicheng and Zhang, Xingxing and Xu, Kele and Wang, Huaimin}, title = {Re-evaluating Continual VQA: Toward Fair and Robust Evaluation for Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18021-18031} }
RefTon: Reference person shot assist virtual Try-on-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Liuzhuozheng and Gong, Yue and Liu, Shanyuan and Wang, Zanyi and Jiang, Dengyang and Wu, Leibucha and Cheng, Bo and Ma, Yuhang and Leng, Dawei and Yin, Yuhui}, title = {RefTon: Reference person shot assist virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14915-14925} }
Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Xi_2026_CVPR, author = {Xi, Xing and Qiu, Yu and Luo, Ronghua and Chen, Peixian and tong, peilin}, title = {Let VLMs Grade Their Own Thoughts: A Self-Quantification Approach to Reasoning-Aware Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26487-26496} }
PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shulei and Wei, Longhui and He, Xin and Ouyang, Jianbo and Lu, Hui and Zhao, Zhou and Tian, Qi}, title = {PSR: Scaling Multi-Subject Personalized Image Generation with Pairwise Subject-Consistency Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14756-14766} }
Gaze Target Estimation Anywhere with Concepts-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xu and Yang, Houze and Gunda, Vipin and Zhou, Zhongyi and Xu, Tianyu and Kowdle, Adarsh and Kim, Inki and Rehg, James M.}, title = {Gaze Target Estimation Anywhere with Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31304-31315} }
UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yuan and Pang, Youwei and Zhang, Lihe and Liu, Hanqi and Zuo, Jiaming and Lu, Huchuan and Zhao, Xiaoqi}, title = {UniMMAD: Unified Multi-Modal and Multi-Class Anomaly Detection via MoE-Driven Feature Decompression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28502-28511} }
CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Junyang and Liu, Qifan and Yang, Wenming and He, Zhihai}, title = {CausalLens: Sensitivity-Guided Multi-Head Causal Intervention for Hallucination Mitigation in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4199-4209} }
Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ruoyu and Guo, Xiaoqing and Liu, Kangwei and Liang, Siyuan and Liu, Shiming and Zhang, Qunli and Wang, Laiyuan and Zhang, Hua and Cao, Xiaochun}, title = {Where MLLMs Attend and What They Rely On: Explaining Autoregressive Token Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17057-17066} }
Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kang and Chen, Yuning and Du, Wan}, title = {Generalizable Radio-Frequency Radiance Fields for Spatial Spectrum Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12533-12543} }
SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guiyu and Chen, Yabo and Xiang, Xunzhi and Huang, Junchao and Wang, Zhongyu and Jiang, Li}, title = {SymphoMotion: Joint Control of Camera Motion and Object Dynamics for Coherent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11127-11137} }
GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Haifeng and Han, Wenshuo and Wang, Zhouyu and Feng, Runyang and Tang, Fan and Lee, Tong-Yee and Fan, Zipei and Wu, Ruihai and Wang, Yuran and Dong, Hao and Chen, Hechang and Chang, Hyung Jin and Gao, Yixing}, title = {GraspALL: Adaptive Structural Compensation from Illumination Variation for Robotic Garment Grasping in Any Low-Light Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6631-6641} }
Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianfei and Zhang, Feng and Sun, Xin and Feng, Chong and Tan, Zhixing}, title = {Tell Model Where to Look: Mitigating Hallucinations in MLLMs by Vision-Guided Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32582-32591} }
Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jae Yun and Nam, Hyeok and Cho, Sung In}, title = {Measure The Feature Universe: Topology-based Pseudo Labeling and Gravity Consistency for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3617-3626} }
Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Lin, Zihan and Zhang, Yixin and Wang, Zilei}, title = {Boosting Vision-Language Models Towards Cross-Domain Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6249-6260} }
Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Yusu and Bocek-Rivele, Eli and Song, Liangchen and Tong, Jialing and Yang, Yinfei and Lu, Jiasen and Hu, Wenze and Gan, Zhe}, title = {Pico-Banana-400K: A Large-Scale Dataset for Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37226-37235} }
D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Enhuai and Wang, Yunke and Sun, Changming and Xu, Chang}, title = {D2Cache: Second-Order Delta Caching for Higher Video Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43589-43599} }
StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Zanxi and Gao, Songqun and Kong, Qiuyu and Wang, Yiming and Cristani, Marco}, title = {StructXLIP: Enhancing Vision-language Models with Multimodal Structural Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17292-17302} }
Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhiceng and Wang, Changmiao and Wan, Jun and Min, Wenwen}, title = {Cross-Slice Knowledge Transfer via Masked Multi-Modal Heterogeneous Graph Contrastive Learning for Spatial Gene Expression Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5710-5719} }
Deformation-based In-Context Learning for Point Cloud Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Chengxing and Deng, Jinhong and Lei, Yinjie and Li, Wen}, title = {Deformation-based In-Context Learning for Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39001-39010} }
SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Minghan and Yang, Lan and Li, Ke and Zhang, Honggang and Pang, Kaiyue and Song, Yi-Zhe}, title = {SemVideo: Reconstructs What You Watch from Brain Activity via Hierarchical Semantic Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13658-13669} }
M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shin_2026_CVPR, author = {Shin, Joongmin and Park, Jeongbae and Seo, Jaehyung and Lim, Heuiseok}, title = {M3DocDep: Multi-modal, Multi-page, Multi-document Dependency Chunking with Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16603-16613} }
Differentiable Laplacian Matrix Guided Superpixel Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Juybari_2026_CVPR, author = {Juybari, Jeremy and Hamilton, Josh and Das, Shuvra and Chen, Chaofan and Khalil, Andre and Zhu, Yifeng}, title = {Differentiable Laplacian Matrix Guided Superpixel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36168-36178} }
3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Minchong and Yuan, Xiaoyun and Wan, Junzhe and Zhang, Jianing and Zhang, Jun}, title = {3M-TI: High-Quality Mobile Thermal Imaging via Calibration-free Multi-Camera Cross-Modal Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5659-5669} }
PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ruiyan and Hu, Teng and Huang, Kaihui and Su, Zihan and Yi, Ran and Ma, Lizhuang}, title = {PoseAnything: General Pose-guided Video Generation with Part-aware Temporal Coherence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23193-23203} }
Content-Aware Dynamic Patchification for Efficient Video Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sheng and Barnes, Connelly and Rizve, Mamshad Nayeem and Peng, Hongwu and Li, Zhengang and Dibua, Ohi and Ganjdanesh, Alireza and Tang, Xulong and Kang, Yan and Gong, Yifan}, title = {Content-Aware Dynamic Patchification for Efficient Video Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35936-35945} }
Object-WIPER: Training-Free Object and Associated Effect Removal in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kushwaha_2026_CVPR, author = {Kushwaha, Saksham Singh and Nag, Sayan and Tian, Yapeng and Kulkarni, Kuldeep}, title = {Object-WIPER: Training-Free Object and Associated Effect Removal in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38071-38080} }
Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Weile and Miao, Bingchen and Yu, Qifan and Bu, Wendong and Wang, Guoming and Zhang, Wenqiao and Zhang, Shengyu and Li, Juncheng and Tang, Siliang}, title = {Learning to Adapt: Self-Improving Web Agent via Cognitive-Aware Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22402-22411} }
EarlyTom: Early Token Compression Completes Fast Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hesong and Jin, Xin and Lu, Lu and Li, Chenhaowen and Chen, Jian and Liu, Qiang and Wang, Huan}, title = {EarlyTom: Early Token Compression Completes Fast Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40559-40568} }
Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yuwu and Liu, Chunzhi}, title = {Black-Box Domain Adaptation for Object Detection with Retention-Driven Knowledge Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {801-811} }
Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Hexiao and Sun, Xiaokun and Cai, Zeyu and Guo, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {Muses: Designing, Composing, Generating Nonexistent Fantasy 3D Creatures without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26995-27006} }
ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Ruiqing and Singamsetti, Mohan Sai and Niu, Di and Rashidi, Bahador}, title = {ReaGEN: Adaptive Generation of Structured Chains-of-Thought for Efficient Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33521-33530} }
High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Feng and Zhang, Kai and Zhang, Li and Jia, Chuanmin}, title = {High Resolution Neural Video Coding with Bi-directional Confidence-Guided Reference Information Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33652-33661} }
WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yisu and Cao, Chenjie and Wang, Tengfei and Zuo, Xuhui and Wu, Junta and Zhu, Jianke and Guo, Chunchao}, title = {WorldStereo: Bridging Camera-Guided Video Generation and Scene Reconstruction via 3D Geometric Memories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40327-40339} }
CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Sultana_2026_CVPR, author = {Sultana, Nurjahan and Yap, Moi Hoon and Fan, Xinqi and Lu, Wenqi}, title = {CoFiDA-M: Concept-Aware Feature Modulation for Cross-Domain Adaptation with Image-Only Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15060-15069} }
GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jianqiang and Pei, Gensheng and Liu, Huafeng and Yao, Yazhou}, title = {GSV2X: Geometry-Aware Uncertainty Modeling and Orthogonal Fusion for Robust Roadside Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21409-21419} }
FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiang and Pan, Jinshan and Dong, Jiangxin and Yang, Jian and Tang, Jinhui}, title = {FoundIR-v2: Optimizing Pre-Training Data Mixtures for Image Restoration Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8471-8480} }
Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jaein and Bin Yoo, Hee and Han, Dong-Sig and Zhang, Byoung-Tak}, title = {Learning Coordinate-based Convolutional Kernels for Continuous SE(3) Equivariant and Efficient Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9986-9995} }
PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Wang, Yaohui and Shao, Shuai and Br\'emond, Fran\c{c}ois and Wang, Jiangtao}, title = {PRISM: Learning a Shared Primitive Space for Transferable Skeleton Action Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6085-6094} }
SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Selvas-Sala_2026_CVPR, author = {Selvas-Sala, Cai and Kang, Lei and Gomez, Lluis}, title = {SALMUBench: A Benchmark for Sensitive Association-Level Multimodal Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39351-39360} }
Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Baek_2026_CVPR, author = {Baek, Seung Hyup and Lee, Jimin and Lee, Hyeongkeun and Cho, Jae Won}, title = {Stay in your Lane: Role Specific Queries with Overlap Suppression Loss for Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3432-3442} }
MusicInfuser: Making Video Diffusion Listen and Dance-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Susung and Kemelmacher-Shlizerman, Ira and Curless, Brian and Seitz, Steven M.}, title = {MusicInfuser: Making Video Diffusion Listen and Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43751-43761} }
HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Ting and Chen, Daoyuan and Jiao, Qirui and Ding, Bolin and Li, Yaliang and Shen, Ying}, title = {HumanVBench: Probing Human-Centric Video Understanding in MLLMs with Automatically Synthesized Benchmarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4494-4504} }
Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2026_CVPR, author = {Mo, Sicheng and Nguyen, Thao and Zhang, Richard and Kolkin, Nick and Iyer, Siddharth Srinivasan and Shechtman, Eli and Singh, Krishna Kumar and Lee, Yong Jae and Zhou, Bolei and Li, Yuheng}, title = {Group Diffusion: Enhancing Image Generation by Unlocking Cross-Sample Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35702-35712} }
MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Juntong and Chen, Zequn and Zhang, Weiqi and Di, Donglin and Zhang, Xuancheng and Yang, Chengmin and Liu, Yu-Shen}, title = {MoRe: Motion-aware Feed-forward 4D Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28914-28924} }
SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Benlei and Zeng, Fangao and Jiang, Weitao and Zhai, Yuwen and Hong, Haiwen and Huang, Longtao and Xue, Hui and Shang, Wenxiang and Huang, Pipei}, title = {SIMPLEPOSTER: A SIMPLE BASELINE FOR PRODUCT POSTER GENERATION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14854-14863} }
Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Chengcan and Nie, Dong and Chen, Geng and Zhang, Daoqiang and Wen, Xuyun}, title = {Simple-ViLMedSAM: Simple Text Prompts Meet Vision-Language Models for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30042-30052} }
Towards Policy-Adaptive Image Guardrail: Benchmark and Method-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piao_2026_CVPR, author = {Piao, Caiyong and Yan, Zhiyuan and Xu, Haoming and Zhao, Yunzhen and Lin, Kaiqing and Xu, Feiyang and Zhou, Shuigeng}, title = {Towards Policy-Adaptive Image Guardrail: Benchmark and Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16614-16623} }
SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qiuyang and Cheng, Jiujun and Mao, Qichao and Liu, Cong and Fang, Yu and Li, Yuhong and Ge, Mengying and Gao, Shangce}, title = {SpikeTrack: A Spike-driven Framework for Efficient Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6802-6811} }
Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Tianchen and Liu, Chen and Yu, Xin}, title = {Beyond Single-View Sufficiency: CVBench for Cross-View Human Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7154-7164} }
Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Minxue and Yu, Yangyang and Ding, Aolin and Pouyan, Maziyar Baran and Belkhouja, Taha and Bao, Yujia}, title = {Adaptive Data Augmentation with Multi-armed Bandit: Sample-Efficient Embedding Calibration for Implicit Pattern Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7978-7989} }
Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yu and Su, Lutong and Huang, Ruixiang and Jiang, Tianji and Tang, Jiadong and Yue, Yufeng and Yang, Yi}, title = {Energy-GS: Image Energy-guided Pose Alignment Gaussian Splatting with redesigned pose gradient flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7310-7319} }
Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Ahyoung and Shin, Wonseok and Kim, Songkuk}, title = {Sparsity as a Key: Unlocking New Insights from Latent Structures for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19298-19307} }
TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Bingyi and Chen, Koert and Maninis, Kevis-Kokitsi and Chen, Kaifeng and Karpur, Arjun and Xia, Ye and Dua, Sahil and Dabral, Tanmaya and Han, Guangxing and Han, Bohyung and Ainslie, Joshua and Bewley, Alex and Jacob, Mithun and Wagner, Ren\'e and Ramos, Washington and Choromanski, Krzysztof and Seyedhosseini, Mojtaba and Zhou, Howard and Araujo, Andre}, title = {TIPSv2: Advancing Vision-Language Pretraining with Enhanced Patch-Text Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29325-29335} }
NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yuhang and Cheng, Bo and Liu, Shanyuan and Zhou, Hongyi and Wu, Liebucha and Leng, Dawei and Yin, Yuhui}, title = {NAMI: Efficient Image Generation via Bridged Progressive Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25667-25676} }
Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation-
[pdf]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Yuanpeng and Chen, Yunpeng and Zhang, Xinyu and Liao, Chao and Zhao, Hengshuang}, title = {Temporal Equilibrium MeanFlow: Bridging the Scale Gap for One-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16064-16073} }
Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sijie and Qian, Biao and Han, Jungong}, title = {Mostly Text, Smart Visuals: Asymmetric Text-Visual Pruning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10472-10481} }
Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yongwei and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Remedying Target-Domain Astigmatism for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19580-19590} }
The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Yuwen and Qing, Yuan and Gong, Boqing}, title = {The LLM Bottleneck: Why Open-Source Vision LLMs Struggle with Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38532-38543} }
Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Taehoon and Gouk, Henry and Hospedales, Timothy}, title = {Test-Time Alignment of Text-to-Image Diffusion Models via Null-Text Embedding Optimisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43611-43620} }
FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Kou_2026_CVPR, author = {Kou, Zhiqiang and Wu, Junxiang and Huang, Wenke and He, Wenwen and Xie, Ming-Kun and Wang, Changwei and Jia, Yuheng and Jiang, Di and Liu, Yang and Geng, Xin and Yang, Qiang}, title = {FedHarmony: Harmonizing Heterogeneous Label Correlations in Federated Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10324-10334} }
SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nikolov_2026_CVPR, author = {Nikolov, Nikolay and Albanese, Giuliano and Dey, Sombit and Yanev, Aleksandar and Van Gool, Luc and Zaech, Jan-Nico and Paudel, Danda Pani}, title = {SPEAR-1: Scaling Beyond Robot Demonstrations via 3D Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35124-35134} }
MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mantes_2026_CVPR, author = {Mantes, Albert Dominguez and La Manno, Gioele and Weigert, Martin}, title = {MuViT: Multi-Resolution Vision Transformers for Learning Across Scales in Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13648-13657} }
Stealing Split Learning Bottom Models by Recovering Embedding Geometry-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinbo and Shi, Yanhang and Zhang, Ziyi and Wang, Hao and Zhang, Sai Qian and Li, Jian}, title = {Stealing Split Learning Bottom Models by Recovering Embedding Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20660-20669} }
SuP: Sub-cloud Driven Point Cloud Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Fung_2026_CVPR, author = {Fung, Sheldon and Pan, Wei and Cao, Ling and Hou, Fei and Chen, Ling and Mao, Shasha and Li, Hongdong and Lu, Xuequan}, title = {SuP: Sub-cloud Driven Point Cloud Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24185-24194} }
Low-Resolution Editing is All You Need for High-Resolution Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junsung and Lee, Hyunsoo and Lee, Yong Jae and Han, Bohyung}, title = {Low-Resolution Editing is All You Need for High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16216-16225} }
When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yahong and Wu, Juncheng and Ni, Zhangkai and Yang, Longzhen and Liu, Yihang and Yang, Chengmei and Wen, Ying and He, Lianghua and Tang, Xianfeng and Liu, Hui and Zhou, Yuyin}, title = {When Token Pruning is Worse than Random: Understanding Visual Token Information in VLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31910-31919} }
Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Yu and Li, Hongyu and Huang, Shaofei and Hui, Tianrui and Wang, Yaxiong and Cheng, Lechao and Zhong, Zhun and Liu, Si and Wang, Meng}, title = {Parse, Search, and Confirmation: Training-Free Aerial Vision-and-Dialog Navigation with Chain-of-Thought Reasoning and Structured Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23859-23868} }
Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jingyi and Zheng, Meisong and Chen, Ying and Qiao, Minglang and Deng, Xin and Xu, Mai}, title = {Rethinking Diffusion Model-Based Video Super-Resolution: Leveraging Dense Guidance from Aligned Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38248-38257} }
Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zixun}, title = {Revisiting F-measure Optimization in Multi-Label Classification: A Sampling-based Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16845-16854} }
TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tengfei and Pan, Weiran and Wei, Wei}, title = {TANGO: Text-Anchored Guided Optimization for Robust Fine-tuning Vision-Language Models under Label Noise}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39186-39196} }
WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bing and Wang, Qiang and Lu, Junda and Zhang, Le and Liu, Yun and Zhu, Ce and Cui, Wei}, title = {WiTTA-Bench: Benchmarking Test-Time Adaptation for WiFi Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18461-18470} }
SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Soyeon and Seo, Chang Wook and Shim, Hyunjung}, title = {SGSoft: Learning Fused Semantic-Geometric Features for 3D Shape Correspondence via Template-Guided Soft Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7142-7153} }
Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zhengyao and Jia, Zexi and Zhong, Yijia and Luo, Pengcheng and Zhang, Jinchao and Lu, Guangming and Yu, Jun and Pei, Wenjie}, title = {Too Vivid to Be Real? Benchmarking and Calibrating Generative Color Fidelity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37258-37267} }
EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Zhang, Chi and Li, Nan and Zhang, Qian and Zhang, Qi and Li, Mingyan}, title = {EgoRoC: Towards Egocentric Robotic Control via Task-Agnostic Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34991-35001} }
FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Yoon, Jiwon and Park, Sumin and Kim, Suree and Jang, Sinae and Lee, Eunsoo and Kang, Dongmin and Min, Dongbo}, title = {FluoCLIP: Stain-Aware Focus Quality Assessment in Fluorescence Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28288-28297} }
Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qianqian and Zhu, Jinchi and Zhou, Xiaolu and Xu, Yongchao}, title = {Physics-Guided Multistep Deformation Reversal for Ancient Bamboo Slip Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34061-34071} }
PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose-
[pdf]
[supp]
[bibtex]@InProceedings{Nadar_2026_CVPR, author = {Nadar, Jebastin and Foti, Simone and Birdal, Tolga}, title = {PoseD-Flow: Versatile and Guided Flow Matching Model of Human Pose}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21165-21175} }
Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Yang and Ma, Jun and Jiao, Zhidong and Li, Xingyuan and Jiang, Zhiying and Liu, Jinyuan}, title = {Toward Real-world Infrared Image Super-Resolution: A Unified Autoregressive Framework and Benchmark Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16365-16375} }
In Pursuit of Pixel Supervision for Visual Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Lihe and Li, Shang-Wen and Li, Yang and Lei, Xinjie and Wang, Dong and Mohamed, Abdelrahman and Xie, Saining and Zhao, Hengshuang and He, Kaiming and Xu, Hu}, title = {In Pursuit of Pixel Supervision for Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31974-31984} }
Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hakyeong and Wang, Ruicheng and Yao, Chengtang and Yang, Jiaolong and Kim, Min H.}, title = {Dense Metric Depth Completion from Sparse Direct Time-of-Flight Sensors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36518-36528} }
Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhenwu and Gong, Jingyu and Wang, Peiwei and Wang, Xingzan and Qian, Tianwen and Li, Wenxi and Fang, Yuan and Xie, Jiao and Ma, Lizhuang and Lin, Shaohui}, title = {Omni-Supervised Motion Editing: Balancing Change and Invariance through Positive-Negative Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30595-30606} }
FabricGen: Microstructure-Aware Woven Fabric Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yingjie and Luo, Di and Wang, Zixiong and Ling, Xiaoli and Yang, Jian and Wang, Beibei}, title = {FabricGen: Microstructure-Aware Woven Fabric Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34333-34342} }
OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Haochen and Ren, Pengfei and Zhang, Buyuan and Li, Da and Han, Tianhao and Zhang, Haoyang and Xie, Liang and Chen, Hongbo and Yin, Erwei}, title = {OMG-Bench: A New Challenging Benchmark for Skeleton-based Online Micro Hand Gesture Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7068-7078} }
FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Yuan and Wang, Lixu and Wu, Jiaqi and Song, Jin and Chen, Simin and Wang, Zehua and Tian, Zijian and Chen, Wei and Li, Huixia and Li, Xiaoxiao}, title = {FedRE: A Representation Entanglement Framework for Model-Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39466-39475} }
Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nishimura_2026_CVPR, author = {Nishimura, Kazuya and Bise, Ryoma and Matsuo, Shinnosuke and Hirose, Haruka and Kojima, Yasuhiro}, title = {Cell-Type Prototype-Informed Neural Network for Gene Expression Estimation from Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19801-19811} }
Scale Space Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mukhopadhyay_2026_CVPR, author = {Mukhopadhyay, Soumik and Udhayanan, Prateksha and Shrivastava, Abhinav}, title = {Scale Space Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35851-35860} }
SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Haiduo and Song, Jiangcheng and Zhang, Yadong and Ren, Pengju}, title = {SelecTKD: Selective Token-Weighted Knowledge Distillation for LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19287-19297} }
Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Minhao and Wen, Zichen and Gu, Zhuangcheng and Liu, Xuyang and Zhang, Rui and Kang, Hengrui and Yang, Jiabing and Zhang, Junyuan and Li, Weijia and He, Conghui and Zhang, Linfeng}, title = {Prune2Drive: A Plug-and-Play Framework for Accelerating Vision-Language Models in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25215-25224} }
Reinforcing Structured Chain-of-Thought for Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Peiyao and Xu, Haotian and Vesdapunt, Noranart and Hou, Rui and Zhang, Jingyi and Ling, Haibin and Obiednikov, Oleksandr and Zhou, Ning and Fu, Kah Kuen}, title = {Reinforcing Structured Chain-of-Thought for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9794-9803} }
VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jiayi and Jiang, Haobo and Soh, De Wen and Zhao, Na}, title = {VGGT-360: Geometry-Consistent Zero-Shot Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19874-19883} }
LitePT: Lighter Yet Stronger Point Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Yuanwen and Robert, Damien and Wang, Jianyuan and Hong, Sunghwan and Wegner, Jan Dirk and Rupprecht, Christian and Schindler, Konrad}, title = {LitePT: Lighter Yet Stronger Point Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24173-24184} }
Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Dong In and Doh, Hyungjun and Chi, Seunggeun and Duan, Runlin and Kim, Sangpil and Ramani, Karthik}, title = {Dynamic-eDiTor: Training-Free Text-Driven 4D Scene Editing with Multimodal Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1187-1197} }
VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Tortei_2026_CVPR, author = {T\"ortei, Brigitta Malagurski and Dahou, Yasser and Huynh, Ngoc Dung and Para, Wamiq Reyaz and Khac, Ph\'uc H. L\^e and Singh, Ankit and Chaybouti, Sofian and Narayan, Sanath}, title = {VisRes Bench: On Evaluating the Visual Reasoning Capabilities of VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33185-33195} }
Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Boyu and Xu, Qianqian and Bao, Shilong and Yang, Zhiyong and Cui, Ruochen and Zhao, Xilin and Huang, Qingming}, title = {Guiding Diffusion-based Reconstruction with Contrastive Signals for Balanced Visual Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2369-2380} }
Domain-Skewed Federated Learning with Feature Decoupling and Calibration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Huan and Shen, Jun and Yan, Jun and Pang, Guansong}, title = {Domain-Skewed Federated Learning with Feature Decoupling and Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17484-17493} }
MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Duc Duy and Chin, Tat-Jun and Hoai, Minh}, title = {MoBind: Motion Binding for Fine-Grained IMU-Video Pose Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22123-22132} }
Learned Image Compression via Sparse Attention and Adaptive Frequency-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Huidong and Shi, Xinyan and Sun, Hui and Yue, Xiaofei and Liu, Xiaoguang and Wang, Gang and Cai, Wentong}, title = {Learned Image Compression via Sparse Attention and Adaptive Frequency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41278-41287} }
Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jin-Seop and Lee, SungJoon and Jung, SeongJun and Li, Boyang and Lee, Jee-Hyong}, title = {Learning to Refuse: Refusal-Aware Reinforcement Fine-Tuning for Hard-Irrelevant Queries in Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10397-10407} }
FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjie and Chen, Tianxiang and Wang, Feng and Wu, Tiantong and Zheng, Zhiming and Tang, Shaoting and Lim, Wei Yang Bryan}, title = {FedAdamom: Adaptive Momentum for Improved Generalization in Federated Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36354-36364} }
Beyond the Static World: Continual Category Discovery under Visual Drift-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Ge, Zongyuan}, title = {Beyond the Static World: Continual Category Discovery under Visual Drift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25032-25042} }
SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker-
[pdf]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Junbin and Xue, Ziteng and Zhang, Shihui and Chen, Kun and Hu, Weiming and Zhang, Zhipeng}, title = {SEATrack: Simple, Efficient, and Adaptive Multimodal Tracker}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28679-28689} }
Rethinking Concept Bottleneck Models: From Pitfalls to Solutions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tapli_2026_CVPR, author = {Tapli, Merve and Bouniot, Quentin and Stammer, Wolfgang and Akata, Zeynep and Akbas, Emre}, title = {Rethinking Concept Bottleneck Models: From Pitfalls to Solutions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9901-9910} }
WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xuweiyi and Zhou, Wentao and Cheng, Zezhou}, title = {WildRayZer: Self-supervised Large View Synthesis in Dynamic Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1252-1264} }
Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hongsong and Cheng, Renxi and Han, Chaolei and Gui, Jie}, title = {Attribution as Retrieval: Model-Agnostic AI-Generated Image Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14062-14072} }
Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kai and Chen, Li and Cheng, Jun}, title = {Dual-level Adapter Boosting Prompt-free Curvilinear Structure Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36300-36310} }
Visual Personalization Turing Test-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abdal_2026_CVPR, author = {Abdal, Rameen and Burgess, James and Tulyakov, Sergey and Wang, Kuan-Chieh Jackson}, title = {Visual Personalization Turing Test}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14789-14799} }
Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Do_2026_CVPR, author = {Do, Minh Kha and Xiang, Wei and Han, Kang and Wu, Di and Phan, Khoa and Chen, Yi-Ping Phoebe and Liu, Gaowen and Kompella, Ramana Rao}, title = {Spectrally Distilled Representations Aligned with Instruction-Augmented LLMs for Satellite Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6453-6463} }
Multimodal Distribution Matching for Vision-Language Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jongoh and Kwon, Hoyong and Kim, Minseok and Yoon, Kuk-Jin}, title = {Multimodal Distribution Matching for Vision-Language Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23072-23082} }
Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Linhua and Sima, Bingrui and He, Kun}, title = {Anchoring the Mind of Multimodal Reasoners: Cognitive Bias as a Vector for Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36926-36935} }
HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Lei and Chen, Yong and Su, Yuejiao and Wang, Yi and Liu, Moyun and Chau, Lap-Pui}, title = {HAMMER: Harnessing MLLMs via Cross-Modal Integration for Intention-Driven 3D Affordance Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23805-23815} }
Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Cao, Yichao and Chen, Yi and He, Xiangjian and You, Shan and Yang, Shuo and Fan, Lei and Wang, Fei and Su, Xiu}, title = {Localizing, Structuring, and Rendering: Bridging 3D and 2D Vision-Language-Action Models for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20812-20822} }
Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Wang and Wang, Hanjing and Zhang, Yufei and Udayanga, Darsha and Ji, Qiang}, title = {Towards Knowledge-augmented Bayesian Deep Learning For Computer Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6136-6146} }
Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Jiachen and Qin, Guanghui and Zhao, Theodore Zhengde and Valanarasu, Jeya Maria Jose and Zhang, Sheng and Naumann, Tristan and Lam, Fan and Wang, Sheng and Poon, Hoifung}, title = {Masked-Diffusion Autoencoders for 3D Medical Vision Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22804-22815} }
BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Yixin and Wang, Ke and Cheng, Tongtong and Liu, Chunhui and Liu, Kai}, title = {BEV-CAR: Enhancing Monocular Bird's Eye View Segmentation with Context-Aware Rasterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39869-39878} }
Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Hung, Yi-Chun and Pytlarz, Jaclyn and Guo, Qi and Alexander, Emma}, title = {Spectrum from Defocus: Fast Spectral Imaging with Chromatic Focal Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {220-230} }
IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Guohao and Wang, Yufei and Ma, Sizhuo and Xie, Yuege and Cheng, Yuting and Tao, Zhiqiang and Wang, Jian}, title = {IF-Prune: Information-Flow Guided Token Pruning for Efficient Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3522-3531} }
Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junming and Yin, Shuyu and Liu, Peilin and Ying, Rendong and Wen, Fei}, title = {Curvature-Aware Zeroth-Order Optimization for Memory-Efficient Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {836-846} }
Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zimmermann_2026_CVPR, author = {Zimmermann, Jakob Paul and Loho, Georg}, title = {Hidden Monotonicity: Explaining Deep Neural Networks via their DC Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24107-24117} }
VL-RouterBench: A Benchmark for Vision-Language Model Routing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhehao and Lin, Baijiong and Zhang, Jingyuan and Wang, Jingying and Liu, Yuhang and Lu, Ning and Li, Tao and Huang, Xiaolin}, title = {VL-RouterBench: A Benchmark for Vision-Language Model Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9512-9523} }
RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jielun and Pun, Chi-Man and Huang, Guoheng}, title = {RevINN: An End-to-End Invertible Neural Network for Reversible Adversarial Examples Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6601-6610} }
BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Xueliang and Zhang, Juncai and Hou, Jiacheng and Lu, Dan and Zhang, Hao and Wang, Ruxin}, title = {BiomedCCPL: Causal Conditional Prompt Learning for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40812-40821} }
Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Haato and Umetani, Nobuyuki}, title = {Neural Gabor Splatting: Enhanced Gaussian Splatting with Neural Gabor for High-frequency Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4932-4941} }
RaUF: Learning the Spatial Uncertainty Field of Radar-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shengpeng and Wang, Kuangyu and Wang, Wei}, title = {RaUF: Learning the Spatial Uncertainty Field of Radar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42474-42483} }
Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ilhan_2026_CVPR, author = {Ilhan, Fatih and Liu, Gaowen and Kompella, Ramana Rao and Tekin, Selim Furkan and Huang, Tiansheng and Yahn, Zachary and Xu, Yichang and Liu, Ling}, title = {Attention-aware Inference Optimizations for Large Vision-Language Models with Memory-efficient Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10482-10491} }
RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Chenxu and Wang, Chenxu and Dai, Yimian and Liu, Yongxiang and Cheng, Ming-Ming and Li, Xiang}, title = {RoadGIE: Towards A Global-Scale Aerial Benchmark for Generalizable Interactive Road Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13285-13295} }
UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization-
[pdf]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xiao and Tang, Huaizhi and Zhang, Feiyang and Yuan, Shiji and Hu, Chun and Zheng, Dezhi and Ma, Kang}, title = {UniGeoRS: A Unified Benchmark for Tri-view Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5399-5408} }
AHS: Adaptive Head Synthesis via Synthetic Data Augmentations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Jang, Hyojin and Jeong, Sohyun and Moon, Seunggi and Kim, Gihwi and Jung, Hoon Jin and Choo, Jaegul}, title = {AHS: Adaptive Head Synthesis via Synthetic Data Augmentations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2125-2135} }
Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities-
[pdf]
[supp]
[bibtex]@InProceedings{Katsikas_2026_CVPR, author = {Katsikas, Dimitrios and Passalis, Nikolaos and Tefas, Anastasios}, title = {Your Dissimilarities Define You: Complementary Learning Exploiting Class Diversities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10512-10521} }
DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Dongqian and Wei, Haoran and Han, Wencheng and Tao, Runzhou and Qiu, Zhongying and Yang, Jianfei and Shen, Jianbing}, title = {DriveVLN: Towards Mapless Vision-and-Language Navigation in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25174-25183} }
Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yifei and Li, Chenglong and Zhang, Yuyang and Hu, Guyue and Tang, Jin}, title = {Cross-modal Fuzzy Alignment Network for Text-Aerial Person Retrieval and A Large-scale Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38732-38741} }
Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Y. and Huang, Chao and Liang, Susan and Bi, Jing and Wang, Yicheng and Shimada, Daiki and Xu, Chenliang}, title = {Asynchronous Temporal Modeling with Two-Agent Framework for Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2799-2810} }
SkillSight: Efficient First-Person Skill Assessment with Gaze-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chi Hsuan and Kumar, Ashutosh and Grauman, Kristen}, title = {SkillSight: Efficient First-Person Skill Assessment with Gaze}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38891-38903} }
TGT: Text-Grounded Trajectories for Locally Controlled Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guofeng and Wang, Angtian and Fang, Jacob Zhiyuan and Jiang, Liming and Yang, Haotian and Liu, Bo and Yang, Yiding and Chen, Guang and Wen, Longyin and Yuille, Alan and Ma, Chongyang}, title = {TGT: Text-Grounded Trajectories for Locally Controlled Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22028-22037} }
Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Kaichen and Wang, Zihao and Li, Muyao and Liu, Anji and Liang, Yitao}, title = {Training One Model to Master Cross-Level Agentic Actions via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {724-734} }
Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Morui and Zhu, Yongqi and Fu, Song and Yang, Qing}, title = {Mind the Hitch: Dynamic Calibration and Articulated Perception for Autonomous Trucks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10668-10677} }
Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xianglin and Wang, Jian and Wang, Xiaolei and Zhang, Zhen and Xiao, Jimin}, title = {Beyond Text: Visual Description Assembly by Probabilistic Model for CLIP-based Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6346-6356} }
Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Clark_2026_CVPR, author = {Clark, Christopher and Zhang, Jieyu and Ma, Zixian and Park, Jae Sung and Tripathi, Rohun and Lee, Sangho and Salehi, Mohammadreza and Ren, Jason and Kim, Chris Dongjoo and Yang, Yinuo and Shao, Vincent and Yang, Yue and Huang, Weikai and Gao, Ziqi and Anderson, Taira and Zhang, Jianrui and Jain, Jitesh and Stoica, George and Farhadi, Ali and Krishna, Ranjay}, title = {Molmo2: Open Weights and Data for Vision-Language Models with Video Understanding and Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28652-28668} }
R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhuangzi and Jin, Jian and Cai, Shilv and Lin, Weisi}, title = {R4-CGQA: Retrieval-based Vision Language Models for Computer Graphics Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9468-9477} }
ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Weikai and Zeng, Ziqian and Zhang, Kehua and Li, Haoran and Zhuang, Huiping and Wang, Ruidong and Chen, Cen and Peng, Hao}, title = {ARGUS: Defending Against Multimodal Indirect Prompt Injection via Steering Instruction-Following Behavior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31-40} }
MonoVLM: Monocular 3D Visual Grounding with Vision Language Models-
[pdf]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Huaizhi and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Chen, Tianlong}, title = {MonoVLM: Monocular 3D Visual Grounding with Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30986-30996} }
SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuda_2026_CVPR, author = {Matsuda, Ryosuke and Kudo, Keito and Yoshida, Haruto and Shimizu, Nobuyuki and Suzuki, Jun}, title = {SLVMEval: Synthetic Meta Evaluation Benchmark for Text-to-Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7784-7794} }
Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhimeng and Yuan, Rongao and Gao, Junlong and Mao, Qi and Ma, Siwei and Gao, Wen and Jia, Chuanmin}, title = {Discovering Adaptive Task Dependencies for Efficient Multi-Task Representation Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5326-5336} }
Single-Round Scalable Analytic Federated Learning-
[pdf]
[bibtex]@InProceedings{Bacellar_2026_CVPR, author = {Bacellar, Alan T. L. and Munir, Mustafa and Fran\c{c}a, Felipe M. G. and Lima, Priscila M. V. and Marculescu, Radu and John, Lizy K.}, title = {Single-Round Scalable Analytic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39445-39454} }
Lynx: Towards High-Fidelity Personalized Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sang_2026_CVPR, author = {Sang, Shen and Zhi, Tiancheng and Gu, Tianpei and Liu, Jing and Luo, Linjie}, title = {Lynx: Towards High-Fidelity Personalized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9192-9202} }
HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuyao and Huang-Menders, Alexander and Tai, Yu-Wing}, title = {HierEdit: Region-Aware Hierarchical Diffusion for Efficient High-Resolution Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43546-43557} }
BluRef: Unsupervised Image Deblurring with Dense-Matching References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Bang-Dang and Tran, Anh and Pham, Cuong and Hoai, Minh}, title = {BluRef: Unsupervised Image Deblurring with Dense-Matching References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37445-37454} }
Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jing and Zhang, Zhaoyang and Shen, Yantao and Cai, Jiarui and Yang, Shuo and Wu, Jiajun and Xia, Wei and Tu, Zhuowen and Soatto, Stefano}, title = {Talk2Move: Reinforcement Learning for Text-Instructed Object-Level Geometric Transformation in Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14735-14745} }
Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yufeng and Wu, Wenxu and Wu, Shaojin and Huang, Mengqi and Ding, Fei and He, Qian}, title = {Scaling Multi-Identity Consistency for Image Customization via Multi-to-Multi Matching Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1906-1916} }
Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Qingyan and Wang, Qiuyu and Ouyang, Hao and Yu, Yue and Wang, Hanlin and Wang, Wen and Cheng, Ka Leong and Ma, Shuailei and Zeng, Yanhong and Liu, Zichen and Xu, Yinghao and Shen, Yujun and Chen, Qifeng}, title = {Scaling Instruction-Based Video Editing with a High-Quality Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37971-37981} }
Linear Fundamental Matrix Estimation from 7 or 5 Points-
[pdf]
[supp]
[bibtex]@InProceedings{Kucukpinar_2026_CVPR, author = {Kucukpinar, Taci Ata and Mogollon, Juan and Fraser, Joshua and Duff, Timothy and Palaniappan, Kannappan}, title = {Linear Fundamental Matrix Estimation from 7 or 5 Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21464-21473} }
Training-free Motion Factorization for Compositional Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Zhou, Ziqin and Chen, Feng and Peng, Duo and Hu, Yixin and Li, Changsheng and Lei, Yinjie}, title = {Training-free Motion Factorization for Compositional Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23139-23149} }
Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yiming and Chen, Chenghao and Wu, Changkun and Fu, Chong and Zhu, Biru and Wen, Zhenyu and Hong, Zhen}, title = {Towards Human-Imperceptible Backdoor Attacks on Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1503-1512} }
Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Weijun and Huang, Yuqing and Meng, Weikang and Li, Xin and Liu, Ming and Hong, Xiaopeng and Wang, Yaowei and Zuo, Wangmeng}, title = {Cluster-Wise Spatio-Temporal Masking for Efficient Video-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39197-39207} }
Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Sudong and Yuan, Shuai and Chen, Bingzhi and Mao, Rui and Wang, Bing}, title = {Selection-as-Nonlinearity: Bridging Attention and Activation via a Joint Game-Decision Lens for Interpretable, Discriminative Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11621-11631} }
Decouple Your Discovery and Memory in Continual Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Jiawei and Gao, Zijian and Zhang, Xingxing and Liu, Xuan and Wang, Huaimin and Xu, Kele}, title = {Decouple Your Discovery and Memory in Continual Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25022-25031} }
Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuze and Gong, Dong and Cao, Xiao and Yuan, Junchao and Li, Dongsheng and Zhou, Lei and Koh, Yun Sing and Yan, Cheng and Zhang, Xinyu}, title = {Let Your Image Move with Your Motion! -- Implicit Multi-Object Multi-Motion Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11207-11217} }
Synthesizing Visual Concepts as Vision-Language Programs-
[pdf]
[supp]
[bibtex]@InProceedings{Wust_2026_CVPR, author = {W\"ust, Antonia and Stammer, Wolfgang and Shindo, Hikaru and Helff, Lukas and Dhami, Devendra Singh and Kersting, Kristian}, title = {Synthesizing Visual Concepts as Vision-Language Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17346-17356} }
FG-Portrait: 3D Flow Guided Editable Portrait Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yating and Miao, Yunqi and Ververas, Evangelos and Deng, Jiankang and Song, Jifei}, title = {FG-Portrait: 3D Flow Guided Editable Portrait Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32947-32956} }
Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Tse_2026_CVPR, author = {Tse, Tze Ho Elden and Peng, Jizong and Yao, Angela}, title = {Learning Scene Coordinate Reconstruction from Unposed Images via Pose Graph Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21816-21825} }
Sky2Ground: A Benchmark for Site Modeling under Varying Altitude-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zengyan and Mitra, Sirshapan and Modi, Rajat and Lim, Hui and Rawat, Yogesh}, title = {Sky2Ground: A Benchmark for Site Modeling under Varying Altitude}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12227-12236} }
Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Seung hee and Jeon, MinJu and Oh, Hyunwoo and Lee, Jihwan and Kim, Dong-Jin}, title = {Follow the Saliency: Supervised Saliency for Retrieval-augmented Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32808-32817} }
FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Runqi and Paren, Alasdair and Yuan, Suqin and Li, Muyang and Torr, Philip and Bibi, Adel and Liu, Tongliang}, title = {FORCE: Transferable Visual Jailbreaking Attacks via Feature Over-Reliance CorrEction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8610-8620} }
Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Huabin and Chen, Xinyu and Zhou, Yuan and Liu, Fei}, title = {Cross-domain Dual-stream Feature Disentanglement for Brain Disorder Prediction with Sparsely Labeled PET}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32037-32046} }
Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yiheng and Liu, Sheng and Feng, Yuan and Jin, Zhelun and Jiang, Yining and Xu, Min}, title = {Focal-General Diffusion Model with Semantic Consistent Guidance for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35915-35925} }
MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuqi and Zhang, Hao and Shao, Wenqi and Liu, Shiqu and Gu, Zhihong and Wang, Wenxiao and He, Xiaofei and Zhang, Kaipeng}, title = {MatchMask: Mask-Centric Generative Data Augmentation for Label-Scarce Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42126-42136} }
E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Mayur and Akada, Hiroyasu and Rhodin, Helge and Theobalt, Christian and Golyanik, Vladislav}, title = {E-3DPSM: A State Machine for Event-based Egocentric 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14017-14026} }
Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Yunpeng and Sun, Yimu and Guo, Jingxing and Wu, Huisi and Qin, Jing}, title = {Semi-supervised Echocardiography Video Segmentation via Anchor Semantic Awareness and Continuous Pseudo-label Reforging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8535-8544} }
Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiacong and Miao, Jiaxu and Lin, Yourun and Wang, Xianyun and Xiao, Jun and Yu, Jun}, title = {Memory-Augmented Scene Understanding and Exploration for Open-World Aerial Object-Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21616-21626} }
The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuo and Wei, Fanyue and Xu, Runze and Li, Jingjing and Duan, Lixin and Yao, Angela and Li, Wen}, title = {The Devil is in Attention Sharing: Improving Complex Non-rigid Image Editing Faithfulness via Attention Synergy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8237-8246} }
GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Tong and Yang, Yijun and Zhang, Changhao and Xing, Junliang and Shi, Yuanchun and Lu, Zongqing and Ye, Deheng}, title = {GTR-Turbo: Merged Checkpoint is Secretly a Free Teacher for Agentic VLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26476-26486} }
ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yaoqin and Xu, Yiteng and Sun, Qin and Zhu, Xinge and Sun, Yujing and Ma, Yuexin}, title = {ReMoGen: Real-time Human Interaction-to-Reaction Generation via Modular Learning from Diverse Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16475-16485} }
PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuanzhe and Zhu, Jingyuan and Mo, Yuchen and Li, Gen and Cao, Xu and Jin, Jin and Shen, Yifan and Li, Zhengyuan and Yu, Tianjiao and Yuan, Wenzhen and Ding, Fangqiang and Lourentzou, Ismini}, title = {PALM: Progress-Aware Policy Learning via Affordance Reasoning for Long-Horizon Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28096-28110} }
Decoupling Vision and Language: Codebook Anchored Visual Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jason and Zhao, Tianchen and Liu, Chang and Cai, Jiarui and Zhang, Zheng and Li, Zhuowei and Singh, Aaditya and Xu, Xiang and Srivastava, Mani and Wu, Jonathan}, title = {Decoupling Vision and Language: Codebook Anchored Visual Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36957-36967} }
DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lorenz_2026_CVPR, author = {Lorenz, Julian and Kovganko, Vladyslav and Kohout, Elias and Phatak, Mrunmai and Kienzle, Daniel and Lienhart, Rainer}, title = {DSFlash: Comprehensive Panoptic Scene Graph Generation in Realtime}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17388-17398} }
Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Youyu and Jiang, Junjun and Luo, Yueru and Jiang, Kui and Liu, Xianming and Yan, Xu and Chen, Dave Zhenyu}, title = {Reliev3R: Relieving Feed-forward 3D Reconstruction from Multi-View Geometric Annotations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21860-21869} }
Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zizhi and Gao, Yizhen and Han, Minghao and Liu, Yizhou and Chen, Zhaoyu and Yang, Dingkang and Zhang, Lihua}, title = {Forging a Dynamic Memory: Retrieval-Guided Continual Learning for Generalist Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32309-32321} }
OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yan and Cheng, Zhengxue and Zhang, Junxuan and Zhou, Dajiang and Gu, Qunshan and Wang, Qi and Song, Li}, title = {OmniZip: Learning a Unified and Lightweight Lossless Compressor for Multi-Modal Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5337-5347} }
AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Tian, Changyao and Xia, Renqiu and Liao, Ning and Guo, Weiwei and Li, Hongsheng and Dai, Jifeng and Li, Hao and Yang, Xue}, title = {AdapTok: Learning Adaptive and Temporally Causal Video Tokenization in a 1D Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16163-16172} }
Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinkuan and Kan, Meina and He, Zhenliang and Zhou, Yongbin and Shan, Shiguang}, title = {Revisiting Visual Corruptions in LVLMs: A Shape-Texture Perspective on Model Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40845-40854} }
Seeing Conversations: Communication Context Identification in Egocentric Video-
[pdf]
[bibtex]@InProceedings{Dorszewski_2026_CVPR, author = {Dorszewski, Tobias and Hjortkj{\ae}r, Jens}, title = {Seeing Conversations: Communication Context Identification in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38816-38825} }
ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control-
[pdf]
[supp]
[bibtex]@InProceedings{Watanabe_2026_CVPR, author = {Watanabe, Akihisa and Yu, Qing and Simo-Serra, Edgar and Fujiwara, Kent}, title = {ProjFlow: Projection Sampling with Flow Matching for Zero-Shot Exact Spatial Motion Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2305-2315} }
Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Baifeng and Fu, Stephanie and Lian, Long and Ye, Hanrong and Eigen, David and Reite, Aaron and Kautz, Jan and Li, Boyi and Chan, David M. and Darrell, Trevor and Molchanov, Pavlo and Yin, Hongxu}, title = {Attend Before Attention: Efficient and Scalable Video Understanding via Autoregressive Gazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17022-17034} }
Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kazimi_2026_CVPR, author = {Kazimi, Tahira and Dunlop, Connor and Yanardag, Pinar}, title = {Diverse Video Generation with Determinantal Point Process-Guided Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12839-12848} }
Text-guided Feature Disentanglement for Cross-modal Gait Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zhiyang and Cheng, Ming}, title = {Text-guided Feature Disentanglement for Cross-modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25484-25493} }
Point Cloud as a Foreign Language for Multi-modal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paul_2026_CVPR, author = {Paul, Sneha and Patterson, Zachary and Bouguila, Nizar}, title = {Point Cloud as a Foreign Language for Multi-modal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16676-16687} }
Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuanyuan and Gong, Yuning and Liu, Yifei and Li, Jingfeng and Xu, Dan and Zhang, Yanci and Zhang, Dingwen and Sun, Xiao and Zhong, Zhihang}, title = {Proxy-GS: Unified Occlusion Priors for Training and Inference in Structured 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7330-7339} }
AnyPcc: Compressing Any Point Cloud with a Single Universal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kangli and Yi, Qianxi and Ye, Yuqi and Li, Shihao and Gao, Wei}, title = {AnyPcc: Compressing Any Point Cloud with a Single Universal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2972-2982} }
InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yang and Yang, Yuyin and Xie, Yiman and Cai, Zetao and Shi, Xu and Gao, Ning and Liu, Hangxu and Jiang, Xuekun and Qiu, Zherui and Yuan, Feng and Li, Yaping and Wang, Ping and Cai, Junhao and Zeng, Jia and Dong, Hao and Pang, Jiangmiao}, title = {InternData-A1: Pioneering High-Fidelity Synthetic Data for Pre-training Generalist Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {976-985} }
SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Weihong and Zhang, Xiaoyu and Zhang, Zhuang and Ye, Zhichao and Wang, Nan and Liu, Haomin and Zhang, Guofeng}, title = {SparseCam4D: Spatio-Temporally Consistent 4D Reconstruction from Sparse Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33237-33247} }
Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt-
[pdf]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Peng and Jiang, Cheng and Yang, Chuande and Sun, Fuming and Bai, Tian}, title = {Training-Free Open-Vocabulary Camouflaged Object Segmentation via Fine-Grained Object Binding and Adaptive Hybrid Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24960-24969} }
ART: Articulated Reconstruction Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zizhang and Zhang, Cheng and Li, Zhengqin and Howard-Jenkins, Henry and Lv, Zhaoyang and Geng, Chen and Wu, Jiajun and Newcombe, Richard and Engel, Jakob and Dong, Zhao}, title = {ART: Articulated Reconstruction Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7468-7479} }
RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sen_2026_CVPR, author = {Sen, Anuvab and Mohammad, Mir Sayeed and Mukhopadhyay, Saibal}, title = {RAVEN: Radar Adaptive Vision Encoders for Efficient Chirp-wise Object Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17938-17947} }
Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yongchao and Liu, Jiawei and Wang, Junfeng and Tao, Sen and Jiang, Na and Zha, Zheng-Jun}, title = {Learning to Diversify and Focus: A Reinforcement Framework for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34673-34682} }
SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yongkang and Cheng, Yu and Zhang, Yushuo and Xie, Yuan and Yin, Zhaoxia}, title = {SAIDO: Generalizable Detection of AI-Generated Images via Scene-Aware and Importance-Guided Dynamic Optimization in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3876-3886} }
Generative Video Motion Editing with 3D Point Tracks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yao-Chih and Zhang, Zhoutong and Huang, Jiahui and Wang, Jui-Hsien and Lee, Joon-Young and Huang, Jia-Bin and Shechtman, Eli and Li, Zhengqi}, title = {Generative Video Motion Editing with 3D Point Tracks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18306-18318} }
FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Markov_2026_CVPR, author = {Markov, Mario and Ailuro, Stefan and Van Gool, Luc and Schindler, Konrad and Paudel, Danda Pani}, title = {FireScope: Wildfire Risk Raster Prediction With a Chain-of-Thought Oracle}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34795-34805} }
Event6D: Event-based Novel Object 6D Pose Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Jae-Young and Cho, Hoonhee and Lee, Taeyeop and Kang, Minjun and Wen, Bowen and Kim, Youngho and Yoon, Kuk-Jin}, title = {Event6D: Event-based Novel Object 6D Pose Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15091-15104} }
FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Bo and Cao, Junzhe and Guo, Dan and Huang, Dongmin and Wang, Wenjin and Tan, Tao and Sun, Yue and Yu, Zitong}, title = {FLOW: Optimal Transport-Driven Feature Warping for Generalized Remote Physiological Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28481-28491} }
Region-Wise Correspondence Prediction between Manga Line Art Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yingxuan and Mao, Jiafeng and Qiu, Qianru and Matsui, Yusuke}, title = {Region-Wise Correspondence Prediction between Manga Line Art Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15334-15342} }
Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Hanjing and Rao, Jiahua and Sun, Youhan and Xie, Jiancong and Yang, Yuedong}, title = {Deciphering Genotype-Phenotype Mechanisms from High-Content Profiling via Knowledge-Guided Multi-modal Graph Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41804-41814} }
Occluded Human Body Capture with Frequency Domain Denoising Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Buzhen and Xu, Chongyang and Tang, Wentao and Shu, Yuan and Ju, Jingyi and Zuo, Binghui and Wang, Yangang}, title = {Occluded Human Body Capture with Frequency Domain Denoising Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13930-13939} }
Unsupervised Multi-agent and Single-agent Perception from Cooperative Views-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haochen and Li, Baolu and Li, Lei and Ren, Delin and Guo, Jiacheng and Qin, Minghai and Zhang, Tianyun and Yu, Hongkai}, title = {Unsupervised Multi-agent and Single-agent Perception from Cooperative Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25810-25819} }
MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Junli and Zheng, Yinan and Liu, Xueyi and Xing, Zebin and Li, Pengfei and Ma, Kun and Ye, Hangjun and Chen, Guang and Li, Guang and Chen, Long and Xia, Zhongpu and Zhang, Qichao}, title = {MeanFuser: Fast One-Step Multi-Modal Trajectory Generation and Adaptive Reconstruction via MeanFlow for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17884-17893} }
FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Xin and Li, Weilong and Liu, Wei and Huang, Wenke and Yu, Zhixi and Yang, Bin and Liao, Xiaoying and Jiang, Kui}, title = {FedBPrompt: Federated Domain Generalization Person Re-Identification via Body Distribution Aware Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40448-40457} }
MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Koleilat_2026_CVPR, author = {Koleilat, Taha and Asgariandehkordi, Hojat and Nejatimanzari, Omid and Barile, Berardino and Xiao, Yiming and Rivaz, Hassan}, title = {MedCLIPSeg: Probabilistic Vision-Language Adaptation for Data-Efficient and Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1406-1417} }
CLIP-like Model as a Foundational Density Ratio Estimator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Uchiyama_2026_CVPR, author = {Uchiyama, Fumiya and Yanagi, Rintaro and Taniguchi, Shohei and Takashiro, Shota and Suzuki, Masahiro and Kataoka, Hirokatsu and Iwasawa, Yusuke and Matsuo, Yutaka}, title = {CLIP-like Model as a Foundational Density Ratio Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15784-15793} }
Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yujun and Wang, Ruisheng and Ao, Xiang and Shen, Haoyuan and Wang, Kuihao and Zhou, Kun and Li, Qingquan}, title = {Edges Compete for Trust: Group Relative Edge Optimization for Building Reconstruction from Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17121-17131} }
CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yi-Lin and Liao, Haoran and Lin, Yuhao and Wang, Pengyue and Liang, Zhizhao and Liu, Guiliang and Zheng, Wei-Shi}, title = {CycleManip: Enabling Cycle-based Manipulation via Effective History Perception and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20780-20789} }
ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Zichen and Hayder, Zeeshan and Liu, Wei and Wang, Hesheng and Mian, Ajmal Saeed}, title = {ARMFlow: AutoRegressive MeanFlow for Online 3D Human Reaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30718-30728} }
Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajing and Zhang, Yumeng and Si, Yue and Fan, Baojie and Tian, Jiandong}, title = {Decoupled and Reusable Adaptation for Efficient Cross-Modal Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {812-822} }
Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Shuang and Huang, Debao and Deng, Deyan and Xiong, Haolin and Tang, Yang and Zhao, Yajie and Qin, Rongjun}, title = {Olbedo: An Albedo and Shading Aerial Dataset for Large-Scale Outdoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6474-6483} }
UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiehui and Zhang, Yuechen and He, Xu and Gao, Yuan and Cen, Zhi and Xia, Bin and Zhou, Yan and Tao, Xin and Wan, Pengfei and Jia, Jiaya}, title = {UnityVideo: Unified Multi-Modal Multi-Task Learning for Enhancing World-Aware Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4471-4481} }
Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dodds_2026_CVPR, author = {Dodds, Laura and Lam, Maisy and Akbar, Waleed and Cheng, Yibo and Adib, Fadel}, title = {Wave-Former: Through-Occlusion 3D Reconstruction via Wireless Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21713-21724} }
FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaokun and Yang, Yi and Ye, Ziqi and Baiyun, Baiyun and Guo, Xiaorong and Fang, Qingchen and Zhang, Ruyi and Zhou, Xinpeng and Wang, Haipeng}, title = {FUSAR-GPT: A Spatiotemporal Feature-Embedded and Two-Stage Decoupled Visual Language Model for SAR Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42158-42168} }
IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Ziye and Yang, Guang and Chen, Yihang and Luo, Changqing}, title = {IrisFP: Adversarial-Example-based Model Fingerprinting with Enhanced Uniqueness and Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39383-39392} }
ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Joanne and Lin, Ruirui and Li, Yini and Bull, David and Anantrasirichai, Nantheera}, title = {ELVIS: Enhance Low-Light for Video Instance Segmentation in the Dark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25011-25021} }
End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Tianyou and Zhang, Yan-Ming and Zhang, Zixiang and Zhou, Jibin and Yin, Fei and Liu, Cheng-Lin}, title = {End-to-End Hyper-Relational Information Extraction for Engineering Diagrams via Dynamically Tokenized Relation Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24438-24448} }
CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Qinqian and Wang, Bo and Tan, Robby T.}, title = {CrossHOI-Bench: A Unified Benchmark for HOI Evaluation across Vision-Language Models and HOI-Specific Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38520-38531} }
SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Kang and Yu, Lei and Luo, Junwei and Dang, Bo and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Chen, Jingdong and Li, Yansheng}, title = {SkySense-VITA: Towards Universal In-context Segmentation of Multi-modal Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20553-20563} }
Drift-Resilient Temporal Priors for Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Lin, Liting and Zhuang, Weijun and He, Zhenyu and Li, Xin}, title = {Drift-Resilient Temporal Priors for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6847-6856} }
MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junbin and Chen, Jiajun and Sun, Tianxiang and Yang, Xun and Yao, Angela}, title = {MuKV: Multi-Grained KV Cache Compression for Long Streaming Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11381-11391} }
Phrase-grounded APO for Improving Chest X-ray Report Generation-
[pdf]
[bibtex]@InProceedings{Mahmood_2026_CVPR, author = {Mahmood, Raziuddin and Syeda-Mahmood, Tanveer}, title = {Phrase-grounded APO for Improving Chest X-ray Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28254-28263} }
Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Al_Nahian_2026_CVPR, author = {Al Nahian, Mohaiminul and Almalky, Abeer Matar and Ahmed, Sabbir and Al Arafat, Abdullah and Rizve, Mamshad Nayeem and Rakin, Adnan Siraj}, title = {Unleashing Stealthy Backdoor Pandemic by Infecting a Single Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34889-34899} }
Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Hao and Wang, Ye and Zhang, Wanpeng and Yuan, Haoqi and Feng, Yicheng and Xu, Haiweng and Zheng, Sipeng and Lu, Zongqing}, title = {Joint-Aligned Latent Action: Towards Scalable VLA Pretraining in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35047-35058} }
SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiaoyan and Bai, Zechen and Wang, Haofan and Song, Yiren}, title = {SIGMA: Selective-Interleaved Generation with Multi-Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38165-38175} }
TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Wonhyeok and Hwang, Kyumin and Park, Jihun and Lee, Kyoungmin and Lee, Seunghun and Kim, Jaeyeul and Choi, Minwoo and Im, Sunghoon}, title = {TaskForce: Cooperative Multi-agent Reinforcement Learning for Multi-task Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36871-36880} }
MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Kristoffersen_2026_CVPR, author = {Kristoffersen, Oskar and S\'anchez, Alba Reinders and Hannemose, Morten Rieger and Dahl, Anders Bjorholm and Papadopoulos, Dim P.}, title = {MMLandmarks: a Cross-View Instance-Level Benchmark for Geo-Spatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26452-26464} }
Flow Matching for Multimodal Distributions-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Gaoxiang and Cole, Frank and Zhang, Sihang and Wan, Yuxiang and Lu, Yulong and Sun, Ju}, title = {Flow Matching for Multimodal Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23260-23271} }
From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Yuwen and Wang, Yuan and Li, Shaohui and Li, Zhi and Liu, Yu and He, You}, title = {From Attraction to Equilibrium: Physics-Inspired Semantic Gravitons for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35628-35637} }
Spatia: Video Generation with Updatable Spatial Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Liu, Zhening and Zhang, Hongyang and Xu, Chang and Lu, Yan}, title = {Spatia: Video Generation with Updatable Spatial Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4245-4257} }
SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation-
[pdf]
[bibtex]@InProceedings{Chong_2026_CVPR, author = {Chong, Qianpeng and Zeng, Wenyi and Shen, Xiuxuan and Li, Jiajie and Yin, Qian and Zheng, Xin}, title = {SegGBC: Justifiable Coarse-to-Fine Granular-Ball Computing for Enhancing Clustering Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42104-42114} }
GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yin and Lu, Hao and Wang, Zixuan and Qin, Zhen and Kuang, Li and Zhou, Mengchu and Deng, Shuiguang}, title = {GaussianMatch: Semi-Supervised Regression with Pseudo-Label Filtering via Multi-View Gaussian Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31985-31994} }
Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shikun and Li, Yong and Wang, Yiqun and Ke, Qiuhong and Chen, Cunjian}, title = {Fresco: Frequency-Spatial Consistent Optimization for Fine-Grained Head Avatar Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40130-40139} }
Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Tianming and Jiang, Haichao and Yang, Yuting and Tan, Chaolei and Li, Shuai and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Long-RVOS: A Comprehensive Benchmark for Long-term Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39497-39507} }
ORV: 4D Occupancy-centric Robot Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiuyu and Li, Bohan and Xu, Shaocong and Wang, Nan and Ye, Chongjie and Chen, Zhaoxi and Qin, Minghan and Ding, Yikang and Zhu, Zheng and Jin, Xin and Zhao, Hang and Zhao, Hao}, title = {ORV: 4D Occupancy-centric Robot Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1053-1066} }
ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Xiaobao and Ye, Zhangjie and Gu, Yuxiang and Zhu, Zunjie and Guo, Yunfei and Shen, Yingying and Zhao, Shan and Lu, Ming and Sun, Haiyang and Wang, Bing and Chen, Guang and Lu, Rongfeng and Ye, Hangjun}, title = {ParkGaussian: Surround-view 3D Gaussian Splatting for Autonomous Parking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19085-19095} }
Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bagon_2026_CVPR, author = {Bagon, Shai and Kichler, Matan and Sheinin, Mark}, title = {Hearing the Room Through the Shape of the Drum: Modal-Guided Sound Recovery from Multi-Point Surface Vibrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14451-14460} }
Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Ziyao and Liu, Yingjie and Yangrui, Zhang and Chen, Mingsong and Tang, Xuan and Wei, Xian}, title = {Curvature-Aware Captioning: Leveraging Geodesic Attention for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30997-31007} }
Personalized Image Descriptions from Attention Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Ruoyu and Le, Hieu and Xu, Jingyi and Mondal, Sounak and Leite, Abe and Zelinsky, Gregory and Hoai, Minh and Samaras, Dimitris}, title = {Personalized Image Descriptions from Attention Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40042-40052} }
RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Yiyang and Zhao, Tianhao and Xiao, Peilun and Jin, Hongyu and Qi, Longyu and Li, Yuxuan and Liang, Liyin and Qian, Yifeng and Lai, Chunbo and Lin, Yutian and Li, Zhihui and Wu, Yu}, title = {RiskProp: Collision-Anchored Self-Supervised Risk Propagation For Early Accident Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2768-2777} }
MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Yiqian and Ye, Qinghong and Xu, Haoran and Li, Jianing and Ma, Dongyang and Wang, Xuan and Zhang, Wei and Tian, Yonghong and Peng, Peixi}, title = {MER-Tracker: Towards High-Speed 3D Point Tracking via Multi-View Event-RGB Hybrid Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37032-37042} }
OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zemskova_2026_CVPR, author = {Zemskova, Tatiana and Staroverov, Aleksei and Yudin, Dmitry and Panov, Aleksandr}, title = {OVSegDT: Segmenting Transformer for Open-Vocabulary Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8120-8129} }
Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shengchao and Chen, Yuxin and Ge, Yuying and Huang, Wei and Lin, Jiehong and Shan, Ying and Qi, Xiaojuan}, title = {Learning to Reason in 4D: Dynamic Spatial Understanding for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9637-9646} }
MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaokun and Cai, Zeyu and Tang, Hao and Tai, Ying and Yang, Jian and Zhang, Zhenyu}, title = {MorphAny3D: Unleashing the Power of Structured Latent in 3D Morphing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27018-27029} }
THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koutoupis_2026_CVPR, author = {Koutoupis, Stefanos and Zervou, Michaela Areti and Kontras, Konstantinos and De Vos, Maarten and Tsakalides, Panagiotis and Tsagkatakis, Grigorios}, title = {THE MORE, THE MERRIER: CONTRASTIVE FUSION FOR HIGHER-ORDER MULTIMODAL ALIGNMENT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8825-8835} }
Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds-
[pdf]
[supp]
[bibtex]@InProceedings{Jensen_2026_CVPR, author = {Jensen, Alexis and Xu, Pei and Karamouzas, Ioannis and Pontonnier, Charles and Pettr\'e, Julien}, title = {Push-and-Step: From RL-Based Balance Recovery to Physical Simulation of Dense Crowds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16387-16397} }
Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xuan and Li, Kangle and Huang, Haohang and Meng, Rui and Zeng, Wenjun and Shen, Xiaoyu}, title = {Beyond Global Similarity: Multi-Conditional Retrieval for Fine-Grained Cross-Modal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9699-9709} }
HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Yakun and Huang, Zhaojun and Yang, Siqi and Xiaokaiti, Yeliduosi and Wei, Shikui and Zhao, Yao and Huang, Tiejun and Shi, Boxin}, title = {HFR and HDR Video from Multi-Attenuated Spikes Using a Rapidly Rotating SpokeND Filter}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19716-19725} }
Experience Transfer for Multimodal LLM Agents in Minecraft Game-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenghao and Liu, Jun and Zhang, Songbo and Jian, Huadong and Ni, Hao and Lee, Lik-Hang and Bae, Sung-Ho and Wang, Guoqing and Yang, Yang and Zhang, Chaoning}, title = {Experience Transfer for Multimodal LLM Agents in Minecraft Game}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37143-37153} }
SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuchen and Li, Jiahe and Yu, Xiaohan and Yu, Lina and Zheng, Jin and Bai, Xiao}, title = {SCE-SLAM: Scale-Consistent Monocular SLAM via Scene Coordinate Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7480-7490} }
FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Taejin and Kim, Joohyeok and Kim, Jinyeong and Kim, Chanyoung and Hwang, Seong Jae}, title = {FEAST: Fully Connected Expressive Attention for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26793-26802} }
HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yongsung and Song, Wooseok and Lew, Jaihyun and Hwangbo, Hun and Lee, Jaehoon and Yoon, Sungroh}, title = {HeSS: Head Sensitivity Score for Sparsity Redistribution in VGGT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36509-36517} }
InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bin and Zhang, Ruichi and Liang, Han and Zhang, Jingyan and Zhang, Juze and Chen, Xin and Xu, Lan and Yu, Jingyi and Wang, Jingya}, title = {InterAgent: Physics-based Multi-agent Command Execution via Diffusion on Interaction Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15253-15265} }
Towards Dynamic Modality Alignment in Multimodal Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Jiayao and Lyu, Fan and Liu, Tianle and Hu, Fuyuan and Feng, Wei}, title = {Towards Dynamic Modality Alignment in Multimodal Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39911-39921} }
Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zang_2026_CVPR, author = {Zang, Zehua and Wang, Xi and Sun, Fuchun and Xu, Xiao and Liu, Lixiang and Zhou, Jiahuan and Li, Jiangmeng}, title = {Test-Time Perturbation Tuning with Delayed Feedback for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8110-8119} }
SonoWorld: From One Image to a 3D Audio-Visual Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Derong and Chen, Xiyi and Lin, Ming C. and Gao, Ruohan}, title = {SonoWorld: From One Image to a 3D Audio-Visual Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30194-30204} }
AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Tianling and Gan, Shengzhe and Gu, Leslie and Li, Yuelei and Zhan, Fangneng and Pfister, Hanspeter}, title = {AREA3D: Active Reconstruction Agent with Unified Feed-Forward 3D Perception and Vision-Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37133-37142} }
Towards High-resolution and Disentangled Reference-based Sketch Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Dingkun and Wang, Xinrui and Wang, Ru and Li, Zhuoru and Yu, Jinze and Iwasawa, Yusuke and Matsuo, Yutaka and Guo, Jiaxian}, title = {Towards High-resolution and Disentangled Reference-based Sketch Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11472-11481} }
Detect Any AI-Counterfeited Text Image-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Zhu, Xuekang and Li, Junchi and Jiang, Changjiang and liu, Jian and Jin, Lianwen}, title = {Detect Any AI-Counterfeited Text Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35437-35450} }
ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Aoyu and Liu, Zhen and Wang, Ziyi and Chen, Dian and Zeng, Bing and Liu, Shuaicheng}, title = {ExpoCM: Exposure-Aware One-Step Generative Single-Image HDR Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29909-29918} }
UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gu and Xu, Qicheng and Zhang, Haozhe and Ma, Jianhan and He, Long and Bao, Yiming and Ping, Zeyu and Yuan, Zhecheng and Lu, Chenhao and Yuan, Chengbo and Liang, Tianhai and Tian, Xiaoyu and Shao, Maanping and Zhang, Feihong and Ding, Mingyu and Gao, Yang and Zhao, Hao and Zhao, Hang and Xu, Huazhe}, title = {UniDex: A Robot Foundation Suite for Universal Dexterous Hand Control from Egocentric Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1841-1852} }
Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haodong and Yu, Hang and Zhong, Zhide and Yuan, Weilin and Gong, Xin and Luo, Zehang and Heyu, Chengxi and Li, Junfeng and Song, Wenxuan and Zhou, Shunbo and Li, Haoang}, title = {Open-world Hand-Object Interaction Video Generation Based on Structure and Contact-aware Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16031-16041} }
LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Junkun and Au, Ho Yin and Xiang, Jingyu and Chen, Jie}, title = {LaMoGen: Language to Motion Generation Through LLM-Guided Symbolic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9364-9373} }
DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shuhao and Liao, Wenjie and Vance, Hayden and Dong, Hang and Zhang, Rui and Guo, Chun-Le and Li, Chongyi}, title = {DNF-SR: Dual-Input and Negative-Aware Feature Fine-Tuning for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38176-38186} }
Unifying Language-Action Understanding and Generation for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Liu, Qian and Ding, Wenjie and Yang, Zhao and Li, Wei and Liu, Chang and Li, Bailin and Zhan, Kun and Lang, Xianpeng and Chen, Wei}, title = {Unifying Language-Action Understanding and Generation for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25193-25203} }
Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification-
[pdf]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yiwei and Guo, Zhengliang and Yuan, Shaozu and Hu, Chengyin and Jia, Zhiyang and Guo, Jiujiang and Chen, Meng and Wang, Peiying and Wang, Longbiao}, title = {Tackling Model Bias via Game-theoretic Multi-agent Collaboration Framework for Hateful Meme Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22143-22152} }
ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Wei and Li, Mingcheng and Wu, Xuecheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {ProFocus: Proactive Perception and Focused Reasoning in Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18129-18139} }
Taming Generative Diffusion Model for Task-Oriented Infrared Imaging-
[pdf]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tengyu and Dai, Zhilong and Diao, Yubo and An, Guanming and Ma, Long and Liu, Jinyuan and Liu, Risheng}, title = {Taming Generative Diffusion Model for Task-Oriented Infrared Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30843-30853} }
PackUV: Packed Gaussian UV Maps for 4D Volumetric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rai_2026_CVPR, author = {Rai, Aashish and Xing, Angela and Agarwal, Anushka and Cong, Xiaoyan and Li, Zekun and Lu, Tao and Prakash, Aayush and Sridhar, Srinath}, title = {PackUV: Packed Gaussian UV Maps for 4D Volumetric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22581-22593} }
PRUE: A Practical Recipe for Field Boundary Segmentation at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Muhawenayo_2026_CVPR, author = {Muhawenayo, Gedeon and Robinson, Caleb and Khanal, Subash and Fang, Zhanpei and Corley, Isaac and Wollam, Alexander and Gao, Tianyi and Strnad, Leonard and Avery, Ryan and Estes, Lyndon and T\'arano, Ana and Jacobs, Nathan and Kerner, Hannah}, title = {PRUE: A Practical Recipe for Field Boundary Segmentation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6484-6495} }
Recovering Physically Plausible Human-Object Interactions from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Dingbang and Vouga, Etienne and Huang, Qixing and Pavlakos, Georgios}, title = {Recovering Physically Plausible Human-Object Interactions from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7079-7088} }
Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Shoubin and Shu, Lei and Yang, Antoine and Fu, Yao and Sunkara, Srinivas and Wang, Maria and Chen, Jindong and Bansal, Mohit and Gong, Boqing}, title = {Ego2Web: A Web Agent Benchmark Grounded in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25633-25643} }
Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Zhirong and Huang, Rui and Liu, Jiacheng and Zou, Chang and Cai, Peiliang and Zheng, Shikang and Shi, Zhengyi and Feng, Liang and Zhang, Linfeng}, title = {Beyond Fixed Formulas: Data-Driven Linear Predictor for Efficient Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30792-30801} }
SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yumeng and Jiang, Ying and Lu, Jiayin and Yang, Yin and Jiang, Chenfanfu}, title = {SPARK: Sim-ready Part-level Articulated Reconstruction with VLM Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7233-7243} }
Diversity over Uniformity: Rethinking Representation in Generated Image Detection-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Qin, Qiao and Liu, Bo and Bi, Xiuli and Xiao, Bin}, title = {Diversity over Uniformity: Rethinking Representation in Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40407-40417} }
LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Zihe and Zhang, Zhuosheng and Gui, Jiaping and Liu, Gongshen}, title = {LaSM: Layer-wise Scaling Mechanism for Defending Pop-up Attack on GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6528-6537} }
AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zixuan and Feng, Xiangrong and Shi, Jieqi and Shao, Lin and Huo, Jing and Gao, Yang}, title = {AGiLe: Learning Robust Long-Horizon Manipulation via Affordance-Grounded Bidirectional Latent Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6760-6769} }
MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Lirong and Wen, Shuo and Huang, Shan and Wang, Chuang and Yang, Yuzhe and Dudek, Gregory and Wang, Xueqian and Su, Jian}, title = {MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37175-37185} }
Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanru and Shao, Yiwen and Tseng, Wei-Cheng and Yu, Dong}, title = {Unlocking Strong Supervision: A Data-Centric Study of General-Purpose Audio Pre-Training Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24780-24791} }
UniDAC: Universal Metric Depth Estimation for Any Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganesan_2026_CVPR, author = {Ganesan, Girish Chandar and Guo, Yuliang and Ren, Liu and Liu, Xiaoming}, title = {UniDAC: Universal Metric Depth Estimation for Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26953-26963} }
MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Xuanhang and Yang, Zhonghao and Zhuo, Cheng and Li, Yu}, title = {MaxMark: High-Capacity Diffusion-Native Watermarking via Robust and Invertible Latent Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9394-9403} }
DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhechao and Zeng, Yiming and Ma, Lufan and Fu, Zeqing and Bai, Chen and Yin, Dongshuo and Lin, Ziyao and Lu, Cheng}, title = {DrivePTS: A Progressive Learning Framework with Textual and Structural Enhancement for Driving Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3699-3708} }
Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Palmer_2026_CVPR, author = {Palmer, Luke and Palasek, Petar and Abdelkawy, Hazem}, title = {Beyond Scanpaths: Graph-Based Gaze Simulation in Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15199-15211} }
See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Boyuan and Yin, Bo-Wen and Li, Yuan-Ming and Wei, Xihan and Hou, Qibin}, title = {See What I Mean: Aligning Vision and Language Representations for Video Fine-grained Object Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36814-36827} }
StreamingTOM: Streaming Token Compression for Efficient Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xueyi and Tao, Keda and Shao, Kele and Wang, Huan}, title = {StreamingTOM: Streaming Token Compression for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24675-24685} }
Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jinwen and Lin, Youfang and Hu, Xiaobo and Wang, Shuo and Lv, Kai}, title = {Local Motion Matters: A Deconstruct-Recompose Paradigm for Reinforcement Learning Pre-training from Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9859-9868} }
VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xueqing and Li, Bohan and Li, Yan and Yang, Zhenheng}, title = {VirtueBench: Evaluating Trustworthiness under Uncertainty in Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40581-40590} }
Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Stefani_2026_CVPR, author = {Stefani, Antonio Luigi and Bisagno, Niccol\`o and Conci, Nicola and Steinbach, Eckehard and De Natale, Francesco}, title = {Haptic Neural Fields: Bringing Tactile Interactions to 3D Rendered Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16021-16030} }
HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiajun and Luo, Shijia and Zhang, Ruikang and Su, Qi}, title = {HUMORCHAIN: Theory-Guided Multi-Stage Reasoning for Interpretable Multimodal Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19176-19185} }
Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gillman_2026_CVPR, author = {Gillman, Nate and Zhou, Yinghua and Tang, Zitian and Luo, Evan and Chakravarthy, Arjan and Aggarwal, Daksh and Freeman, Michael and Sun, Chen}, title = {Goal Force: Teaching Video Models To Accomplish Physics-Conditioned Goals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20077-20087} }
Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Seunghwan and Yun, Jooyeol and Lee, Youngdo and Choo, Jaegul}, title = {Selectively Extracting and Injecting Visual Attributes into Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21976-21985} }
LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Yiming and Xu, Mutian and Ye, Chongjie and Qin, Jie and Lu, Shunlin and Qin, Yipeng and Han, Xiaoguang}, title = {LoFA: Learning to Predict Personalized Prior for Fast Adaptation of Visual Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21986-21996} }
GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiayin and Sun, Caixia and Yang, Boyu and Li, Hailin and Chen, Xiao and Zhang, Yi and Ding, Errui and Li, Liang and Deng, Chao and Feng, Junlan}, title = {GeoTikzBridge: Advancing Multimodal Code Generation for Geometric Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9593-9603} }
Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Cailing and Li, Zhangbin and Zhou, Jinxing and Qian, Wei and Hu, Jingjing and Zhou, Yanghao and Duan, Zhangling and Guo, Dan}, title = {Face-Guided Sentiment Boundary Enhancement for Weakly-Supervised Temporal Sentiment Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24759-24769} }
VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Quande and Zhai, Ruifeng and Lin, Liang and Wang, Guangrun}, title = {VLA Models Are More Generalizable Than You Think: Revisiting Physical and Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35025-35035} }
SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Saif Ullah and Stricker, Didier}, title = {SIMSPINE: A Biomechanics-Aware Simulation Framework for 3D Spine Motion Annotation and Benchmarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21176-21187} }
ResCa: Residual Caching for Diffusion Transformers Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Haipeng and Li, Yu and Tang, Fan and Lu, Yixing and Cao, Juan and Tang, Sheng}, title = {ResCa: Residual Caching for Diffusion Transformers Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32957-32966} }
Omni2Sound: Towards Unified Video-Text-to-Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Yusheng and Chen, Zehua and Jiang, Yuxuan and Ke, Qiuhong and Cai, Jianfei and Zhu, Jun}, title = {Omni2Sound: Towards Unified Video-Text-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1661-1671} }
Composing Concepts from Images and Videos via Concept-prompt Binding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Xianghao and Zhang, Zeyu and Guo, Yuwei and Zhao, Zhuoran and Zhang, Songchun and Rao, Anyi}, title = {Composing Concepts from Images and Videos via Concept-prompt Binding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14800-14810} }
RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation-
[pdf]
[supp]
[bibtex]@InProceedings{Chai_2026_CVPR, author = {Chai, Jinming and Li, Lingling and Jiao, Licheng and Lu, Xiaoqiang and Sun, Long and Liu, Xu and Ma, Wenping and Li, Weibin}, title = {RECS4R: Bridging Semantics and Geometry for Referring Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42213-42224} }
LogCD: Local-to-global Consistency Distillation for Few-step Image Generation-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qingsong and Liao, Zhenyi and Chen, Chen and Deng, Zhijie and Lu, Haonan}, title = {LogCD: Local-to-global Consistency Distillation for Few-step Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8954-8964} }
Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Wenliang and Barton, Rob and Goncalves, Lucas and Kumar, Kushal and Jiang, Feng and Ma, Hehuan and Guo, Yuzhi and Bansal, Vidit and Bouyarmane, Karim and Huang, Junzhou}, title = {Universal Guideline-Driven Image Clustering via a Hybrid LLM Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33749-33760} }
EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Monga_2026_CVPR, author = {Monga, Munish and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {EW-DETR: Evolving World Object Detection via Incremental Low-Rank DEtection TRansformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11324-11333} }
GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yonghao and Gao, Yupeng and Yang, Jian and Xie, Jin and Wang, Beibei}, title = {GOR-IS: 3D Gaussian Object Removal In the Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40896-40906} }
AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Girish_2026_CVPR, author = {Girish, Sharath and Ivanov, Viacheslav and Chen, Tsai-Shien and Chen, Hao and Siarohin, Aliaksandr and Tulyakov, Sergey}, title = {AlcheMinT: Fine-grained Temporal Control for Multi-Reference Consistent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23227-23237} }
Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gengwei and Peng, Jie and Tan, Zhen and Qiu, Mufan and Mahjoub, Hossein Nourkhiz and Tadiparthi, Vaishnav and Lee, Kwonjoon and Zhang, Yanyong and Chen, Tianlong}, title = {Understanding the Role of Hallucination in Reinforcement Post-Training of Multimodal Reasoning Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25326-25335} }
QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Tianxiao and Zhao, Shanwei and Fang, Shuo and Zhu, Shiai and Ma, Chenguang}, title = {QuietPrune: Query-Guided Early Token Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3553-3562} }
b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zohra_2026_CVPR, author = {Zohra, Fatimah and Zhao, Chen and Itani, Hani and Ghanem, Bernard}, title = {b-CLIP: Text-Conditioned Contrastive Learning for Multi-Granular Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {680-689} }
IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Lizhou and Xia, Songpengcheng and Lai, Zengyuan and Sun, Lan and Yang, Jiarui and Pei, Ling}, title = {IMU-HOI: A Symbiotic Framework for Coherent Human-Object Interaction and Motion Capture via Contact-Conscious Inertial Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42901-42910} }
From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Shikang and Chen, Guantao and He, Landis and Liu, Jiacheng and Lin, Yuqi and Zou, Chang and Zhang, Linfeng}, title = {From Sketch to Fresco: Efficient Diffusion Transformer with Progressive Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18714-18723} }
DSO: Direct Steering Optimization for Bias Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Paes_2026_CVPR, author = {Paes, Lucas Monteiro and Sivakumar, Nivedha and Wang, Yinong Oliver and Fedzechkina, Masha and Theobald, Barry-John and Zappella, Luca and Apostoloff, Nicholas}, title = {DSO: Direct Steering Optimization for Bias Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31714-31724} }
From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ze and Zhang, Kai and Wang, Xianquan and Liu, Shuochen and Yan, Jiaxian and Han, Yupeng and Liu, Qi}, title = {From Pixel to Precision: Enhancing Handwritten Mathematical Expression Recognition with Image-Level Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25948-25957} }
Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Muyang and Liu, Yucheng and Ma, Jianbo and Osborne, Elliot and Han, Bo and Liu, Tongliang}, title = {Rethinking Model Selection in VLM Through the Lens of Gromov-Wasserstein Distance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17237-17247} }
TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Weicheng and Meng, Wenjia and Zhang, Zhengzhe and Yin, Yilong and Lu, Xiankai}, title = {TSTM: Temporal Segmentation for Task-relevant Mask in Visual Reinforcement Learning Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27999-28009} }
Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Haonan and Dong, Shichao and Dong, Xin and Sun, Zenghui and Wang, Jin and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Zhang, Kaifu}, title = {Cross-modal Identity Mapping: Minimizing Information Loss in Modality Conversion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {766-777} }
Graph Attention Prototypical Network for Robust Few-Shot Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tingyun and Liu, Licheng and Zhang, Qibin and Feng, Qiying and Chen, C. L. Philip}, title = {Graph Attention Prototypical Network for Robust Few-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33933-33942} }
Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiaojun and Kittler, Josef}, title = {Towards Highly Transferable Vision-Language Attack via Semantic-Augmented Dynamic Contrastive Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1493-1502} }
CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Yuyang and Gu, Jiaqi and Lou, Yujing and Fan, Lubin and Yang, Qi and Wang, Ying and Ding, Kun and Wu, Yue and Xiang, Shiming and Ye, Jieping}, title = {CC-VQA: Conflict- and Correlation-Aware Method for Mitigating Knowledge Conflict in Knowledge-Based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5232-5241} }
Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain-
[pdf]
[supp]
[bibtex]@InProceedings{Gump_2026_CVPR, author = {Gump, Avery and Henley, Connor and Cheong, Sungjin and Prabhakara, Akarsh and Gupta, Mohit}, title = {Ghosts in the Point Clouds: De-glaring LiDAR in the Transient Domain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17174-17183} }
OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Hao and Wang, Ze and Guo, Shangwei and Duan, Mengfei and Wang, Song and Chen, Teng and Yang, Kailun and Wang, Lin and Wang, Kaiwei}, title = {OneOcc: Semantic Occupancy Prediction for Legged Robots with a Single Panoramic Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14229-14240} }
ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yu-Wei and Wang, Xin and Mao, Pengzhe and Feng, Tongtong and Wang, Ren and Zhu, Wenwu}, title = {ModularAgent: A Task-Aware Modular Framework for Joint Optimization of Multimodal Large Language Models and World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8087-8096} }
Bridging Domains through Subspace-Aware Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaves_2026_CVPR, author = {Chaves, Levy and Zhou, Chao and Burkholz, Rebekka and Valle, Eduardo and Avila, Sandra}, title = {Bridging Domains through Subspace-Aware Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7990-7999} }
Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Jinjia and Tan, Jican and Yu, Jiazuo and Tao, Zeze and Wang, Huibing}, title = {Dynamic Magic: Unleashing Restricted Knowledge for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32278-32287} }
Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Berthy T. and Chael, Andrew A. and Bromley, David and Levis, Aviad and Freeman, William T. and Bouman, Katherine L.}, title = {Dynamic Black-hole Emission Tomography with Physics-informed Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12511-12521} }
Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yicheng and Zhang, Wanpeng and Wang, Ye and Luo, Hao and Yuan, Haoqi and Zheng, Sipeng and Lu, Zongqing}, title = {Spatial-Aware VLA Pretraining through Visual-Physical Alignment from Human Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {712-723} }
PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gulle_2026_CVPR, author = {Gulle, Merve and Yun, Junno and Alcalar, Yasar Utku and Akcakaya, Mehmet}, title = {PnP-CM: Consistency Models as Plug-and-Play Priors for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38464-38474} }
Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Enzhuo and Zhao, Sijie and Muhtar, Dilxat and Li, Zhenshi and Zhang, Xueliang and Xiao, Pengfeng}, title = {Remote Sensing Image Super-Resolution for Imbalanced Textures: A Texture-Aware Diffusion Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38238-38247} }
Hist2Style: Histogram-Guided Stylization with Bilateral Grids-
[pdf]
[supp]
[bibtex]@InProceedings{Galor_2026_CVPR, author = {Galor, Dekel and Pikielny, Adam and Zhang, Zhoutong and Wang, Ke and Waller, Laura and Chen, Jiawen and Chugunov, Ilya}, title = {Hist2Style: Histogram-Guided Stylization with Bilateral Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29717-29726} }
Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Che, Chengan and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {Back to the Feature: Explaining Video Classifiers with Video Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9921-9931} }
Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Dongwon and Seo, Gawon and Lee, Jinsung and Cho, Minsu and Kwak, Suha}, title = {Planning in 8 Tokens: A Compact Discrete Tokenizer for Latent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8183-8193} }
Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jincen and Zhou, Qianyu and Li, Yuhang and Su, Kui and Wang, Meili and Chang, Jian and Zhang, Jian Jun and Lu, Xuequan}, title = {Mamba Learns in Context: Structure-Aware Domain Generalization for Multi-Task Point Cloud Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39098-39110} }
ALLNet: Multi-task Dense Prediction for Degraded Images-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Weiran and Wu, Jialing and Chang, Yaqi and He, Gang and Xu, Li and Wu, Chang and Li, Yunsong}, title = {ALLNet: Multi-task Dense Prediction for Degraded Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20421-20432} }
GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhen_2026_CVPR, author = {Zhen, Yihao and Xu, Mingyue and Wang, Qiang and Fan, Baojie and Dong, Jiahua and Zhao, Tinghui and Fan, Huijie}, title = {GMT: Effective Global Framework for Multi-Camera Multi-Target Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28201-28210} }
Vision-Speech Models: Teaching Speech Models to Converse about Images-
[pdf]
[supp]
[bibtex]@InProceedings{Royer_2026_CVPR, author = {Royer, Am\'elie and B\"ohle, Moritz and Mazar\'e, Laurent and Zeghidour, Neil and D\'efossez, Alexandre and P\'erez, Patrick}, title = {Vision-Speech Models: Teaching Speech Models to Converse about Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1706-1715} }
Geometric-Photometric Event-based 3D Gaussian Ray Tracing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kohyama_2026_CVPR, author = {Kohyama, Kai and Aoki, Yoshimitsu and Gallego, Guillermo and Shiba, Shintaro}, title = {Geometric-Photometric Event-based 3D Gaussian Ray Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22301-22311} }
Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Xiaolong and Jiang, Qi and Gao, Yao and Sun, Lei and Yi, Zhonghua and Yang, Kailun and Van Gool, Luc and Wang, Kaiwei}, title = {Towards Universal Computational Aberration Correction in Photographic Cameras: A Comprehensive Benchmark Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26772-26782} }
MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cetinkaya_2026_CVPR, author = {Cetinkaya, Bedrettin and Kalkan, Sinan and Akbas, Emre}, title = {MatchED: Crisp Edge Detection Using End-to-End, Matching-based Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42093-42103} }
Robust Promptable Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sohyun and Gwon, Yeho and Hoyer, Lukas and Schindler, Konrad and Sakaridis, Christos and Kwak, Suha}, title = {Robust Promptable Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39838-39847} }
Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Novikov_2026_CVPR, author = {Novikov, David and Vaknin, Eilon and Tumanyan, Narek and Sheinin, Mark}, title = {Color-Encoded Illumination for High-Speed Volumetric Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41784-41793} }
SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shihua and Xu, Tianhao and Li, Zizhuo and Ma, Qing and Ma, Jiayi}, title = {SAG-GNN: Semantic-Aware Guided GNN for Descriptor-Free 2D-3D Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31410-31420} }
Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Boyuan and Xi, Yingjie and He, Rui and Na, Jinhe and Cao, Ying and Wang, Pengjie and Zhang, Jian J. and Yang, Xiaosong}, title = {Towards Storytelling Animations: Joint Synthesis of Human and Camera Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38376-38386} }
Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chun_2026_CVPR, author = {Chun, Yongchan and Park, Chanhee and Yoon, Jeongho and Seo, Jaehyung and Lim, Heuiseok}, title = {Evidential Transformation Network: Turning Pretrained Models into Evidential Models for Post-hoc Uncertainty Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6157-6166} }
Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Yizhou and Feng, Yuezhu and Zhang, Jinjin and Wang, Peng and Liu, Qingjie and Wang, Yunhong}, title = {Reasoning-Driven Anomaly Detection and Localization with Image-Level Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43061-43071} }
DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Das_2026_CVPR, author = {Das, Debasmit and Hayat, Munawar and Porikli, Fatih}, title = {DEVA: Fine-tuning Multimodal Large Language Models for Visual Perception Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39141-39151} }
SplitFlux: Learning to Decouple Content and Style from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yitong and Wang, Yinglin and Wang, Changshuo and Zhang, Yongjun and Chen, Ziyang and He, Shuting}, title = {SplitFlux: Learning to Decouple Content and Style from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {572-582} }
TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yunbi and Tang, Enqi and Li, Shiyu and Shuai, Hui and Ma, Lei and Li, Juncheng and Yu, Kuai and Lou, Shu and Pan, Yongchu and Liu, Qingshan}, title = {TAlignDiff: Automatic Tooth Alignment assisted by Diffusion-based Transformation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22784-22793} }
BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Zhongjie and Lin, Di and Wang, Xin and Dong, Haotian and Wang, Chong and Wu, Dongdong and Zhang, Changqing}, title = {BA-GS: Bayesian Adaptive Gaussian Splatting for SFM-Free 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26124-26133} }
MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data-
[pdf]
[supp]
[bibtex]@InProceedings{Laczko_2026_CVPR, author = {Laczk\'o, Hunor and Jia, Libang and Truong, Loc-Phat and Hern\'andez, Diego and Escalera, Sergio and Gonzalez, Jordi and Madadi, Meysam}, title = {MV-Fashion: Towards Enabling Virtual Try-On and Size Estimation with Multi-View Paired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42810-42823} }
CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hanyang and Liu, Yiyang and Chi, Jiawei and Liu, Fangfu and Xue, Ran and Duan, Yueqi}, title = {CFG-Ctrl: Control-Based Classifier-Free Diffusion Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11437-11447} }
VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yulu and Zhang, Bohao and Tang, Zongheng and Liao, Jitong and Wu, Wenjun and Liu, Si}, title = {VGGT-Segmentor: Geometry-Enhanced Cross-View Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21690-21700} }
FlowComposer: Composable Flows for Compositional Zero-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zhenqi and Li, Lin and Chen, Long}, title = {FlowComposer: Composable Flows for Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12396-12405} }
UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Shenghui and Hu, Menghao and Zou, Longkun and Chi, Hongyu and Li, Zekai and Gao, Feng and Yang, Fan and Wu, Qingyao and Chen, Ke}, title = {UAV-CB: A Complex-Background RGB-T Dataset and Local Frequency Bridge Network for UAV Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40468-40478} }
Generalizable Sparse-View 3D Reconstruction from Unconstrained Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gupta_2026_CVPR, author = {Gupta, Vinayak and Lin, Chih-Hao and Wang, Shenlong and Bhattad, Anand and Huang, Jia-Bin}, title = {Generalizable Sparse-View 3D Reconstruction from Unconstrained Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33217-33226} }
BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Tongyan and Gong, Haoran and Liu, Yuan and Wang, Di and Chen, Ying-Cong and Zhao, Wufan}, title = {BuildAnyPoint: 3D Building Structured Abstraction from Diverse Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17099-17109} }
XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Kihwan and Shin, Juyeon and Kang, Jungheum and Kim, Sijung and Jeon, Minyong}, title = {XPaintNet: An eXtreme Lightweight Framework for Stereoscopic Conversion without Inpainting Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5762-5771} }
MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yushi and Wang, Zining and Yuan, Zhihang and Ding, Yifu and Gong, Ruihao and Guo, Jinyang and Liu, Xianglong and Zhang, Jun}, title = {MoDES: Accelerating Mixture-of-Experts Multimodal Large Language Models via Dynamic Expert Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30205-30215} }
BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Qingyuan and Hou, Saihui and Hu, Xuecai and Huang, Yongzhen}, title = {BarbieGait: An Identity-Consistent Synthetic Human Dataset with Versatile Cloth-Changing for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28402-28412} }
Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Aojun and Feng, Tao and Yuan, Hangjie and Li, Wei and Sun, Yanan}, title = {Why Does RL Generalize Better Than SFT? A Data-Centric Perspective on VLM Post-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4761-4771} }
Cross-Hand Latent Representation for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangqi and Liang, Yutong and Ye, Jianglong and Huang, Jia-Yang and Jing, Changwei and Duan, Rocky and Abbeel, Pieter and Wang, Xiaolong and Zou, Xueyan}, title = {Cross-Hand Latent Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13496-13507} }
NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Chen, Zhiwei and Feng, Wei and Wan, Liang}, title = {NoOVD: Novel Category Discovery and Embedding for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6304-6313} }
VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhizhou and Guan, Shanyan and Gao, Zhanxin and Ci, En and Ge, Yanhao and Li, Wei and Zhang, Zhenyu and Yang, Jian and Tai, Ying}, title = {VINS-120K: Ultra High-Resolution Image Editing with A Large-Scale Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15302-15312} }
SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Embacher_2026_CVPR, author = {Embacher, Felix and Uhrig, Jonas and Cordts, Marius and Enzweiler, Markus}, title = {SearchAD: Large-Scale Rare Image Retrieval Dataset for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33088-33098} }
Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junmyeong and Choi, Hoseung and Cho, Minsu}, title = {Space-Time Forecasting of Dynamic Scenes with Motion-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41033-41043} }
POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junfeng and Xue, Zhe and Qi, Yuankai and Du, Junping and Kong, Xiangyang and Yan, Yishuo and Beheshti, Amin and Yang, Jian and van den Hengel, Anton and Yang, Ming-Hsuan}, title = {POGA: Paraphrased and Oppositional Graph Alignment for Fine-Grained Cross-Modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2735-2745} }
PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hadgi_2026_CVPR, author = {Hadgi, Souhail and Gong, Bingchen and Sundararaman, Ramana and Pierson, Emery and Li, Lei and Wonka, Peter and Ovsjanikov, Maks}, title = {PatchAlign3D: Local Feature Alignment for Dense 3D Shape Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3014-3023} }
Nonlinear Color Transfer via Learnable Bezier Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junhyoung and Jo, Seongwoon and Park, JeongHun and Ryou, Yeonji and Yang, Jeongha and Kim, Jangho}, title = {Nonlinear Color Transfer via Learnable Bezier Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41741-41751} }
DreamOmni2: Multimodal Instruction-based Generation and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Bin and peng, bohao and Zhang, Yuechen and Huang, Junjia and Liu, Jiyang and Li, Jingyao and Tan, Haoru and Wu, Sitong and Wang, Chengyao and Wang, Yitong and Yu, Bei and Jia, Jiaya}, title = {DreamOmni2: Multimodal Instruction-based Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29275-29284} }
TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Leyuan and Zhang, Huanjia and Pan, Dongyu and Wu, Hai and Xia, Qiming and Xiong, Kezheng and Li, Wen and Wen, Chenglu and Wang, Cheng}, title = {TACO: Task-Aware Contrastive Learning for Joint LiDAR Localization and 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9965-9975} }
SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jing-Yao and Zhang, Heng and Zhang, Mingsen and Yang, Binbin and Yin, Fei}, title = {SAM2Text: Towards Prompt-Free and Multi-Resolution Video Scene Text Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3825-3834} }
JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haolun and He, Yu and Chen, Tailun and Shao, Shuo and Chu, Zhixuan and Zhou, Hongbin and Tao, Lan and Qin, Zhan and Ren, Kui}, title = {JANUS: A Lightweight Framework for Jailbreaking Text-to-Image Models via Distribution Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15719-15729} }
WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Kaihang and Chen, Weile and Qiu, Haiyi and Yu, Qifan and Bu, Wendong and Wang, Zehan and Zhu, Yun and Li, Juncheng and Tang, Siliang}, title = {WiseEdit: Benchmarking Cognition- and Creativity-Informed Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37268-37278} }
UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guozhen and Zhou, Zixiang and Hu, Teng and Peng, Ziqiao and Zhang, Youliang and Chen, Yi and Zhou, Yuan and Lu, Qinglin and Wang, Limin}, title = {UniAVGen: Unified Audio and Video Generation with Asymmetric Cross-Modal Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1950-1960} }
When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards-
[pdf]
[supp]
[bibtex]@InProceedings{Naseh_2026_CVPR, author = {Naseh, Ali and Suri, Anshuman and Peng, Yuefeng and Chaudhari, Harsh and Oprea, Alina and Houmansadr, Amir}, title = {When Anonymity Breaks: Identifying Models Behind Text-to-Image Leaderboards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24449-24459} }
PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Baiqin and Zhu, Xiangyu and Shen, Fan and Xu, Hao and Lei, Zhen}, title = {PC-Talk: Precise Facial Animation Control for Audio-Driven Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25153-25162} }
Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion-
[pdf]
[supp]
[bibtex]@InProceedings{Devulapally_2026_CVPR, author = {Devulapally, Naresh Kumar and Agarwal, Shruti and Asnani, Vishal and Lokhande, Vishnu Suresh}, title = {Interpretable Prompts made Edit-Friendly: Token-to-Token Similarity Reduction in dLLMs for Edit-Friendly Hard Prompt Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43290-43299} }
Egocentric Visibility-Aware Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Peng and Zhang, Yu and Yiqiang, Feng and Fan, Zhen and Zhang, Yang}, title = {Egocentric Visibility-Aware Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7047-7057} }
CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yuxin and Lu, Yu and Sun, Haoyuan and Yao, Huanjin and Liu, Fanglong and Sun, Yifan and Feng, Haocheng and Zhou, Hang and Wang, Jingdong}, title = {CoLoGen: Progressive Learning of Concept-Localization Duality for Unified Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14724-14734} }
Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Haozhe and Li, Rui and Wang, Zhengbao and Zhu, Xinhao and Li, Linjie and Xiong, Tianyu and Ouyang, Xuan and Yang, Jiaqi}, title = {Topology-aware Feature Propagation for Unsupervised Non-rigid Point Cloud Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31389-31399} }
LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hengjian and Zhang, Kaiwei and Wang, Shibo and Chen, Mingjie and Cao, Qihang and Wang, Xianfeng and Zhu, Yucheng and Min, Xiongkuo and Sun, Wei and Zhu, Dandan and Zhai, Guangtao}, title = {LifeEval: A Multimodal Benchmark for Assistive AI in Egocentric Daily Life Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32892-32902} }
VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Chen, Guilian and Jiang, Zheng and Wu, Huisi and Qin, Jing}, title = {VesMamba: 3D Pulmonary Vessel Segmentation from CT images via Mamba with Structural Perception and Scale-aware Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1439-1449} }
FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yibin and Pan, Yihan and Nan, Jun and Chen, Liwei and Yi, Jianjun}, title = {FSFSplatter: Geometrically Accurate Reconstruction with Free Sparse-view Images within 2 minutes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26134-26143} }
Visual Grounding for Object Questions-
[pdf]
[supp]
[bibtex]@InProceedings{Everaert_2026_CVPR, author = {Everaert, Martin Nicolas and Liu, Xiruo and Takeda, Hiroyuki and Bala, Raja and Yadav, Vivek and Narayanan, Vidya}, title = {Visual Grounding for Object Questions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11966-11975} }
HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Jie and Li, Xin and Yang, Fan and Wang, Yan and Yu, Dong and Wang, Changying and Dai, Linwei and Chen, Yongxiang and Chen, Youqin and Chen, Jianzhang}, title = {HySeg: Learning Generative Priors for Structure-Aware Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6410-6420} }
Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lianyu and Wang, Meng and Fu, Huazhu and Zhang, Daoqiang}, title = {Authorize-on-Demand: Dynamic Authorization with Legality-Aware Intellectual Property Protection for VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6621-6630} }
R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Zhongyu and Tang, Yousen and Wang, Yongtao and Wang, Zhifeng and Qin, Weijun}, title = {R4Det: 4D Radar-Camera Fusion for High-Performance 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18766-18775} }
Decision Boundary-aware Generation for Long-tailed Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiacheng and Zhang, Ruichi and Shang, Chikai and Li, Mengke and Shang, Xinyi and Gao, Junlong and Zhang, Yonggang and Lu, Yang}, title = {Decision Boundary-aware Generation for Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29441-29450} }
When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Sunoh and Um, Daeho}, title = {When CLIP Sees More, It Fights Back Harder: Multi-View Guided Adaptive Counterattacks for Test-Time Adversarial Robustness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15689-15699} }
LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Xuanzhao and Zhu, Wenhui and Chen, Xiwen and Wang, Zhipeng and Qiu, Peijie and Tang, Shao and Li, Xin and Wang, Yalin}, title = {LLaDA-MedV: Exploring Large Language Diffusion Models for Biomedical Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22773-22783} }
Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Peiyu and Akhtar, Naveed and Jiang, Jiantong and Mian, Ajmal}, title = {Attribution-Guided Model Rectification of Unreliable Neural Network Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38979-38989} }
Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Yingxin and YU, Zitong and Wang, Jun and Shen, Linlin and Xu, Yong and Cao, Xiaochun}, title = {Agent4FaceForgery: Multi-Agent LLM Framework for Realistic Face Forgery Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14073-14083} }
DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Zehong and Wei, Longhui and Wang, Shuai and Zhang, Shiliang and Tian, Qi}, title = {DeCo: Frequency-Decoupled Pixel Diffusion for End-to-End Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43600-43610} }
Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Shihao and Shang, Chikai and Yang, Zhiheng and Yang, Jiacheng and Shang, Xinyi and Gao, Junlong and Zhang, Yiqun and Lu, Yang}, title = {Fine-Tuning Impairs the Balancedness of Foundation Models in Long-tailed Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17505-17514} }
Modeling Cross-vision Synergy for Unified Large Vision Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shengqiong and Wu, Lanhu and Bao, Mingyang and Xu, Wenhao and Zhang, Hanwang and Yan, Shuicheng and Fei, Hao and Chua, Tat-Seng}, title = {Modeling Cross-vision Synergy for Unified Large Vision Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22941-22952} }
D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Qiang and Shang, Wenqi and Wang, Meifang and Wang, Xiao}, title = {D2FANet: Enhancing Video Object Detection with Dual-Domain Feature Aggregation Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11229-11239} }
A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction-
[pdf]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Jingzhou and Zhao, Tianya and Wang, Xuyu}, title = {A Geometric Algebra-Informed 3DGS Framework for Wireless Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4973-4982} }
Radar-Guided Polynomial Fitting for Metric Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Park, Hyoungseob and Ezhov, Vadim and Moon, Jeffrey and Wong, Alex}, title = {Radar-Guided Polynomial Fitting for Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26941-26952} }
QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miao_2026_CVPR, author = {Miao, Daniel and Lerman, Gilad and Kileel, Joe}, title = {QuadSync: Quadrifocal Tensor Synchronization via Tucker Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28786-28795} }
A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Liang}, title = {A Polynomial Chaos Framework for Causal Discovery in Nonlinear Uncertain Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17474-17483} }
RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linfei and Zhang, Lin and Shen, Ying}, title = {RealVLG-R1: A Large-Scale Real-World Visual-Language Grounding Benchmark for Robotic Perception and Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42396-42407} }
LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qiang and Yang, Yaozong and Wang, Kaibin and Wei, Ziteng and Chen, Feifei and Chua, Caslon and Yang, Yun}, title = {LoPrune: Efficient Data Pruning for LoRA-Based Fine-Tuning of Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5242-5252} }
Enhancing Out-of-Distribution Detection with Extended Logit Normalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Yifan and Liu, Xixi and Unger, Jonas and Eilertsen, Gabriel}, title = {Enhancing Out-of-Distribution Detection with Extended Logit Normalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24823-24832} }
FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs-
[pdf]
[supp]
[bibtex]@InProceedings{Zinonos_2026_CVPR, author = {Zinonos, Andreas and Stypu{\l}kowski, Micha{\l} and Bigata, Antoni and Petridis, Stavros and Pantic, Maja and Drobyshev, Nikita}, title = {FlashLips: 100-FPS Mask-Free Latent Lip-Sync using Reconstruction Instead of Diffusion or GANs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10898-10908} }
Towards Robust Multimodal Large Language Models Against Jailbreak Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Ziyi and Cao, Yuanpu and Liu, Han and Wang, Ting and Chen, Jinghui and Ma, Fenglong}, title = {Towards Robust Multimodal Large Language Models Against Jailbreak Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22847-22856} }
Bridging Domain Expertise and Generalization for Performance Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuxuan and Zhao, Zhilin and Kong, Quyu and Zheng, Wei-Shi}, title = {Bridging Domain Expertise and Generalization for Performance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7967-7977} }
VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Yufei and Meng, Qianke and Chen, Minghao and Ding, Jiajun and Shao, Zhenwei and Yu, Zhou}, title = {VideoARM: Agentic Reasoning over Hierarchical Memory for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24042-24051} }
Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Minh-Tuan and Le, Xuan-May and Tran, Quan Hung and Harandi, Mehrtash and Phung, Dinh and Le, Trung}, title = {Test-Time Instance-Specific Parameter Composition: A New Paradigm for Adaptive Generative Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37982-37991} }
LA-Pose: Latent Action Pretraining Meets Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhengqing and Nair, Saurabh and Chidananda, Prajwal and Kachana, Pujith and Li, Samuel and Brown, Matthew and Furukawa, Yasutaka}, title = {LA-Pose: Latent Action Pretraining Meets Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34460-34469} }
Motion-Aware Animatable Gaussian Avatars Deblurring-
[pdf]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Muyao and Zhan, Yifan and Zhu, Qingtian and Li, Zhuoxiao and Wang, Wei and Zhong, Zhihang and Sun, Xiao and Zheng, Yinqiang}, title = {Motion-Aware Animatable Gaussian Avatars Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40140-40151} }
240FPS Stereo Vision from Monocular Mixed Spikes-
[pdf]
[supp]
[bibtex]@InProceedings{Xiaokaiti_2026_CVPR, author = {Xiaokaiti, Yeliduosi and Chang, Yakun and Bai, Yang and Huang, Zhaojun and Duan, Peiqi and Shi, Boxin}, title = {240FPS Stereo Vision from Monocular Mixed Spikes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26688-26697} }
VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Haotian and Li, Ye and Lu, Rongwei and Tang, Chen and Xia, Shu-Tao and Wang, Zhi}, title = {VVS: Accelerating Speculative Decoding for Visual Autoregressive Generation via Partial Verification Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12173-12182} }
Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oikarinen_2026_CVPR, author = {Oikarinen, Tuomas and Yan, Ge and Kulkarni, Akshay and Weng, Tsui-Wei}, title = {Beyond Top Activations: Efficient and Reliable Crowdsourced Evaluation of Automated Interpretability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2885-2894} }
Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Guanghui and Liang, Xuefeng and Wen, Qixiang}, title = {Balanced Dataset Distillation via Modeling Multiple Visual Pattern Distribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19634-19643} }
NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Tianlin and Dai, Jiayi and Yuan, Chenpu and Lv, Zhengyao and Yang, Binxin and Yin, Hubery and Li, Chen and Lyu, Jing and Shan, Caifeng and Si, Chenyang}, title = {NOVA: Sparse Control, Dense Synthesis for Pair-Free Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1917-1927} }
X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Xinhao and Xu, Jiachen and Li, Yang and Ma, Changfeng and Yang, Yunhan and Wang, Chunshi and Zhao, Zibo and Lai, Zeqiang and Zhao, Yunfei and Chen, Zhuo and Guo, Chunchao}, title = {X-Part: High Fidelity And Structure Coherent Shape Decomposition And Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27062-27071} }
FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Zijia and Messikommer, Nico and Zou, Rong and Zubic, Nikola and Scaramuzza, Davide and Kneip, Laurent}, title = {FastEventDGS: Deformable Gaussian Splatting for Fast Dynamic Scenes from a Single Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29537-29546} }
FE2E: From Editor to Dense Geometry Estimator-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiyuan and Lin, Chunyu and Sun, Lei and Liu, Rongying and Nie, Lang and Li, Mingxing and Liao, Kang and Chu, Xiangxiang}, title = {FE2E: From Editor to Dense Geometry Estimator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19844-19853} }
Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hao and Chen, Junyang and Pan, Jinshan and Dong, Jiangxin}, title = {Bridging Fidelity-Reality with Controllable One-Step Diffusion for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30584-30594} }
Adaptive Confidence Regularization for Multimodal Failure Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Moru and Dong, Hao and Fink, Olga and Trapp, Mario}, title = {Adaptive Confidence Regularization for Multimodal Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15850-15859} }
ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Bo and Wu, Haotian and Lin, Hehai and Huang, Weiquan and Zhu, Beier and Shu, Yao and Qin, Chengwei}, title = {ACE-Merging: Data-Free Model Merging with Adaptive Covariance Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29472-29482} }
Hyperbolic Busemann Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Ziheng and Sch\"olkopf, Bernhard and Sebe, Nicu}, title = {Hyperbolic Busemann Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42029-42038} }
ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hoyoung and Jang, Minwoo and Koo, Jabin and Yun, Sangdoo and Ok, Jungseul}, title = {ChimeraLoRA: Multi-Head LoRA-Guided Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9063-9073} }
TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaxiong and Tan, Zhen and Zhang, Jinpu and Zhou, Yi and Shen, Hui and Chen, Xieyuanli and Hu, Dewen}, title = {TTAPFormer: Robust Arbitrary Point Tracking via Transient Asynchronous Fusion of Frames and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37053-37062} }
CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chonghuinan and Chen, Zihan and Wei, Yuxiang and Jiang, Tianyi and Wu, Xiaohe and Li, Fan and Zuo, Wangmeng and Yao, Hongxun}, title = {CREval: An Automated Interpretable Evaluation for Creative Image Manipulation under Complex Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9029-9039} }
FVAR: Next-Focus Prediction for Visual Autoregressive Modeling-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaofan and Wu, Chenming and Sun, Yanpeng and Zhou, Jiaming and Qu, Delin and Qu, Yansong and Bo, Weihao and Yu, Haibao and Liang, Dingkang}, title = {FVAR: Next-Focus Prediction for Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30391-30401} }
OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xilong and Chen, Jianchun and Rao, Pramod and Teufel, Timo and Lyu, Linjie and Minasian, Tigran and Sotnychenko, Oleksandr and Long, Xiao-Xiao and Habermann, Marc and Theobalt, Christian}, title = {OLATverse: A Large-scale Real-world Object Dataset with Precise Lighting Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28848-28859} }
Landscape-Awareness for Geometric View Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yan-Ting and Chen, Hao-Wei and Hsiao, Tsu-Ching and Lee, Chun-Yi}, title = {Landscape-Awareness for Geometric View Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38475-38486} }
MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Yuhao and Choudhuri, Anwesa and Gao, Zhongpai and Planche, Benjamin and Nguyen, Van Nguyen and Zheng, Meng and Shen, Yuhan and Innanje, Arun and Chen, Terrence and Elhamifar, Ehsan and Wu, Ziyan}, title = {MedGRPO: Multi-Task Reinforcement Learning for Heterogeneous Medical Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2788-2798} }
Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Wanying and Gao, Jianxiong and Wang, Wei and Fu, Yanwei}, title = {Modeling Spatiotemporal Neural Frames for High Resolution Brain Dynamic}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6994-7002} }
One Algorithm to Align Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pang_2026_CVPR, author = {Pang, Boyi and Ignatyev, Savva and Ippolitov, Vladimir and Khafizov, Ramil and Melnik, Yurii and Voynov, Oleg and Nakhodnov, Maksim and Alanov, Aibek and Fan, Xiaopeng and Wonka, Peter and Burnaev, Evgeny}, title = {One Algorithm to Align Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30446-30456} }
Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Changshuo and Wang, Jiangming and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong and Wang, Shunli and Yi, Ran and Ma, Lizhuang}, title = {Beyond [CLS] Token: Query-Driven Token-Level Forgery Purification for Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42922-42931} }
LoST: Level of Semantics Tokenization for 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dutt_2026_CVPR, author = {Dutt, Niladri Shekhar and Shi, Zifan and Guerrero, Paul and Huang, Chun-Hao Paul and Ceylan, Duygu and Mitra, Niloy J. and Chen, Xuelin}, title = {LoST: Level of Semantics Tokenization for 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19950-19959} }
Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinfan and Zhang, Wuze and Hu, Zhangli and Zhao, Zhehan and Chen, Ye and Ni, Bingbing}, title = {Differentiable Stroke Planning with Dual Parameterization for Efficient and High-Fidelity Painting Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26721-26730} }
Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Bowen and You, Sisi and Bao, Bing-Kun}, title = {Predict Before You Explore: Predictive Planning with Specialized Memory for Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29610-29619} }
Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Gong, Jiachao and Wu, Mingda and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Score2Instruct: Scaling Up Video Quality-Centric Instructions via Automated Dimension Scoring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11426-11436} }
Pano360: Perspective to Panoramic Vision with Geometric Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengdong and Xue, Weiyi and Yang, Zuyuan and Zhou, Wenlve and Zhou, Zhiheng}, title = {Pano360: Perspective to Panoramic Vision with Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7600-7609} }
RenderFlow: Single-Step Neural Rendering via Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shenghao and Liu, Runtao and Schroers, Christopher and Zhang, Yang}, title = {RenderFlow: Single-Step Neural Rendering via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40602-40611} }
RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Zian and Wei, Wei and Gao, Qingshan and Fu, Yuanyuan}, title = {RDF-MIG: A Robust Diffusion Framework for Masked Image Generation to Augment Semantic Segmentation and Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35758-35767} }
iLRM: An Iterative Large 3D Reconstruction Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Nam, Seungtae and Yang, Seungkwon and Sun, Xiangyu and Khamis, Sameh and Mohamed, Abdelrahman and Park, Eunbyung}, title = {iLRM: An Iterative Large 3D Reconstruction Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37332-37342} }
Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Tian and Yang, Hongtao and Shi, Liangtao and Li, Jun and Hu, Xiantao and Yang, Jian and Tai, Ying}, title = {Adaptive Depth Lightweight RGB-T Tracking with Holistic Token Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20942-20952} }
Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective-
[pdf]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Kaifang and Ma, Lianbo and Liu, Jiaqi and Liu, Liming and Xie, Guoyang}, title = {Towards an Incremental Unified Multimodal Anomaly Detection: Augmenting Multimodal Denoising From an Information Bottleneck Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14116-14125} }
NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Weijian and Nan, Mu and Zhu, Yu and Cao, Jiahang and Zhang, Rui and Dai, Yuqin and Song, Chunfeng and Luo, Andrew and Wu, Jiamin}, title = {NeuroFlow: Toward Unified Visual Encoding and Decoding from Neural Activity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12966-12976} }
Unified Multimodal Models as Auto-Encoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Zhiyuan and Lin, Kaiqing and Li, Zongjian and Ye, Junyan and Han, Hui and Wang, Haochen and Wang, Zhendong and Lin, Bin and Li, Hao and Xiao, Xinyan and Wang, Jingdong and Wang, Haifeng and Yuan, Li}, title = {Unified Multimodal Models as Auto-Encoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41903-41912} }
Test-Time 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Sun, Xiangyu and Yang, Huitong and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {Test-Time 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35691-35701} }
PhaseWin Search Framework Enable Efficient Object-Level Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zihan and Chen, Ruoyu and Zhang, Junchi and Hu, Yue and Zhang, Hua and Cao, Xiaochun}, title = {PhaseWin Search Framework Enable Efficient Object-Level Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2875-2884} }
DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kai and Li, Jiafeng and He, Lianghua and Wen, Ying}, title = {DGS: Dual Gradient and Semantic-Shift Guided Low-Rank Adaptation for Class Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32267-32277} }
Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Changsheng and Fu, Zijian and Qi, Mengshi}, title = {Robo-SGG: Exploiting Layout-Oriented Normalization and Restitution Can Improve Robust Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39282-39292} }
Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shuo and Tang, Xu and Ma, Jingjing and Zhang, Xiangrong}, title = {Vision-Language Model Guided Source-Free Domain Adaptation via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36989-36998} }
Learning to Generate Highly Dynamic Videos using Synthetic Motion Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Wonjoon and Won, Jiyun and Han, Janghyeok and Dai, Qi and Luo, Chong and Baek, Seung-Hwan and Cho, Sunghyun}, title = {Learning to Generate Highly Dynamic Videos using Synthetic Motion Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18331-18341} }
ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenhui and Dong, Guoqing and Peng, Weijie}, title = {ProgTrack: A Multi-Object Tracking Algorithm with Progressive Matching Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20902-20911} }
Adapting Lightweight Image-based Counting Models for Video Crowd Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Shu_2026_CVPR, author = {Shu, Weibo and Chan, Antoni B.}, title = {Adapting Lightweight Image-based Counting Models for Video Crowd Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35227-35237} }
FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Min and Ma, Junchao and Feng, Yinfu and Ding, Jiajun and Pan, Wenwen and Han, Tingting and Zheng, Qian and Kuang, Zhenzhong and Yu, Zhou}, title = {FedAFD: Multimodal Federated Learning via Adversarial Fusion and Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3400-3409} }
ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wei and Liu, Jizhihui and Yixing, Li and Tong, Junwen and Shao, Rui and Nie, Liqiang}, title = {ConsisVLA-4D: Advancing Spatiotemporal Consistency in Efficient 3D-Perception and 4D-Reasoning for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6706-6717} }
ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Han and Jia, Xiaosong and Xie, Yichen and Sun, Siyu and Liao, Wenlong and Yang, Xiaokang and Yan, Junchi}, title = {ActiveAD: Planning-Oriented Active Learning for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3656-3666} }
Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Shengju and Liu, Suyuan and Shao, Wenhao and Wang, Siwei and Liang, Ke and Yang, Xihong and Li, Tiejun and Liu, Xinwang}, title = {Plug-and-Play Incomplete Multi-View Clustering via Janus-Faced Affinity Learning with Topology Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3594-3603} }
SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rim_2026_CVPR, author = {Rim, Patrick and Harris, Kevin and Copple, Braden and Han, Shangchen and Xie, Xu and Shugurov, Ivan and An, Sizhe and Wen, He and Wong, Alex and Hodan, Tomas and He, Kun}, title = {SHOW3D: Capturing Scenes of 3D Hands and Objects in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7111-7120} }
MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Xingzu and Jiang, Runmin and Gupta, Vatsal and Swaminathan, Tanush and Wang, Yanwen and Zhang, Genpei and Wang, Haili and Xu, Min}, title = {MicroFM: Physics-guided Flow Matching for Isotropic Microscopy Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15639-15648} }
Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Amoyal_2026_CVPR, author = {Amoyal, Roy and Freifeld, Oren and Baskin, Chaim}, title = {Cross-Instance Gaussian Splatting Registration via Geometry-Aware Feature-Guided Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4993-5002} }
MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xiangyu and Liang, He and Galoaa, Bishoy and Nandi, Utsav and Moezzi, Shayda and He, Yuhang and Ostadabbas, Sarah}, title = {MoReGen: Multi-Agent Motion-Reasoning Engine for Code-based Text-to-Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7632-7642} }
DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Dahye and Ghadiyaram, Deepti and Gadde, Raghudeep}, title = {DDiT: Dynamic Patch Scheduling for Efficient Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11459-11471} }
HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Serifi_2026_CVPR, author = {Serifi, Gent and Buehler, Marcel C.}, title = {HyperGaussians: High-Dimensional Gaussian Splatting for High-Fidelity Animatable Face Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25236-25247} }
Spot The Ball: A Benchmark for Visual Social Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Balamurugan_2026_CVPR, author = {Balamurugan, Neha and Wu, Sarah and Eyzaguirre, Cristobal and Gerstenberg, Tobias}, title = {Spot The Ball: A Benchmark for Visual Social Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30919-30928} }
Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yang and Chi, Zhixiang and Yan, Xudong and Wang, Yang and Feng, Songhe}, title = {Bridging the Modality Gap in Compositional Zero-Shot Learning via Sparse Alignment and Unimodal Memory Bank}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5553-5563} }
SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Huiyuan and Yoon, Sang Min}, title = {SSM-Aware Token-Efficient VMamba via Adaptive Patch Pruning and Merging for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4363-4372} }
CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Xianghui and Wen, Bowen and Chang, Yan and Rabeti, Hesam and Li, Jiefeng and Yuan, Ye and Pons-Moll, Gerard and Birchfield, Stan}, title = {CARI4D: Category Agnostic 4D Reconstruction of Human-Object Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14006-14016} }
SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zixuan and Tang, Kaiyuan and Xia, Jun and Qin, Yifan and Gu, Lin and Wang, Chaoli and Chen, Jianxu and Shi, Yiyu}, title = {SGI: Structured 2D Gaussians for Efficient and Compact Large Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12162-12172} }
FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Mingyu and Lin, Kevin Qinghong and Shou, Mike Zheng and Ng, Hwee Tou}, title = {FocusUI: Efficient UI Grounding via Position-Preserving Visual Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20313-20323} }
VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinlei and Xu, Chengming and Zhang, Guibin and Chen, Zhangquan and Zhang, Yudong and He, Yongbo and Jiang, Peng-Tao and Zhang, Jiangning and Hu, Xiaobin and Yan, Shuicheng}, title = {VisMem: Latent Vision Memory Unlocks Potential of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31544-31555} }
ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Chuanjin and Wu, Lifan and Chang, Wenjie and Chang, Hanzhi and Yang, Wenfei and Zhang, Tianzhu}, title = {ExMesh: EXplicit Mesh Reconstruction with Topology Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27136-27145} }
Scalable Feature Matching via State Space Modeling and Sparse Correlation-
[pdf]
[bibtex]@InProceedings{Choo_2026_CVPR, author = {Choo, Sin Wai and Li, Bo}, title = {Scalable Feature Matching via State Space Modeling and Sparse Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6685-6694} }
Real-World Point Tracking with Verifier-Guided Pseudo-Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Aydemir_2026_CVPR, author = {Aydemir, G\"orkay and G\"uney, Fatma and Xie, Weidi}, title = {Real-World Point Tracking with Verifier-Guided Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13553-13562} }
Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Wei and Meng, Xianghan and Huang, Zhiyuan and Qi, Xianbiao and Xiao, Rong and Li, Chun-Guang}, title = {Multi-Modal Representation Learning via Semi-Supervised Rate Reduction for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39637-39646} }
ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiaoxue and Chen, Xinyuan and Wang, Yaohui and Qiao, Yu}, title = {ShotDirector: Directorially Controllable Multi-Shot Video Generation with Cinematographic Transitions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2079-2089} }
4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yiting and Luo, Wei and Tu, Peiyan and Li, Haoran and Zhu, Hanxin and Yu, Zihao and Wang, Xingrui and Chen, Xinyi and Peng, Xinge and Li, Xin and Chen, Zhibo}, title = {4DWorldBench: A Comprehensive Evaluation Framework for 3D/4D World Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34322-34332} }
Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Qianhao and Mi, Jiajia and Yan, Mingtao and Liu, JingSheng and Pang, ShuYang and Li, Weiling}, title = {Dual-Prototype-Guided Multi-task Learning for Unsupervised Anomaly Detection and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14137-14146} }
TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Cheng and Zhou, Zimu and Ma, Ke and Guo, Bin}, title = {TaskIT: Memory-Efficient Fine-Tuning of Multi-LoRA LLMs via Cross-Task Importance Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37010-37021} }
Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Tsai-Ching and Chen, Cheng-Chi and Yang, Yuan-Fu}, title = {Towards Open-Vocabulary Industrial Defect Understanding with a Large-Scale Multimodal Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13059-13068} }
Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianbin and Feng, Chaoran and Yu, Miao and Li, Yingtao and Tang, Zhenyu and Yu, Wangbo and Zhao, Yian and Li, Xiaomin and Yuan, Li and Tian, Yonghong}, title = {Style-GRPO: Semantic-Aware Preference Optimization for Image Style Transfer Guided by Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12818-12828} }
AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Shih-Po and Elhamifar, Ehsan}, title = {AXG-Reasoner: Error Detection and Explanation in Long Task Videos with Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3421-3431} }
URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Ri and Chen, Zhao and Cao, Caleb Chen and Chen, Lei}, title = {URICA: A Uniformity Region Affine Identifier Capture Algorithm for Arbitrary Region Retrieval in Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32777-32786} }
Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zeren and Zheng, Chuanxia and Laina, Iro and Larlus, Diane and Vedaldi, Andrea}, title = {Mesh4D: 4D Mesh Reconstruction and Tracking from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14524-14535} }
Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yusen and Lin, Qing and Nunna, Bhargava Satya and Zhang, Mengmi}, title = {Learning to See Through a Baby's Eyes: Early Visual Diets Enable Robust Visual Intelligence in Humans and Machines}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13766-13780} }
Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yan and Cao, Fuyuan and Zhao, Xingwang}, title = {Conflict-Aware Adaptive Cross-Reconstruction for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15880-15889} }
Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bo and Lei, Yunkuo and Bao, Tingting and Yan, Hang and Wang, Yaxian and Fu, Weiping and Zhang, Lingling and Liu, Jun}, title = {Neurodynamics-Driven Coupled Neural P Systems for Multi-Focus Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26541-26550} }
MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jongmin and Kang, Seungyeop and Yoo, Sungjoo}, title = {MV-RoMa: From Pairwise Matching into Multi-View Track Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7446-7456} }
EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Iijima_2026_CVPR, author = {Iijima, Lucas and Luo, Yihao and Sesia, Dario and Kaura, Amit and Mayet, Jamil and Yap, Choon Hwai}, title = {EchoPOSE: 6D Pose Estimation of Sparse Echocardiograms for Left-Ventricular 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22753-22762} }
FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yunlong and Deng, Xiaoheng and Xu, Hongyan and Qiu, Zhuohua and Hu, Xiaowen and You, Shan and Chen, Yi and Xu, Chang and Su, Xiu}, title = {FedMOP: Achieving Enhanced Privacy and Performance in Federated Learning via Momentum Orthogonal Projection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39425-39434} }
FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Kirschstein_2026_CVPR, author = {Kirschstein, Tobias and Giebenhain, Simon and Nie{\ss}ner, Matthias}, title = {FlexAvatar: Learning Complete 3D Head Avatars with Partial Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18193-18203} }
AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jiawei and Zhu, Wanrong and I Morariu, Vlad and Tensmeyer, Christopher}, title = {AnyDoc: Enhancing Document Generation via Large-Scale HTML/CSS Data Synthesis and Height-Aware Reinforcement Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {626-635} }
Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping-
[pdf]
[supp]
[bibtex]@InProceedings{Guillaume_2026_CVPR, author = {Guillaume, Duret and Mazurak, Danylo and Zara, Florence and Peters, Jan and Chen, Liming}, title = {Breaking the 3D Dataset Bottleneck: Fast Scalable Generation of Aligned 3D Assets from Scratch for Category 6D Pose Estimation and Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1809-1818} }
Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Pingrui and Su, Yifei and Wu, Pengyuan and An, Dong and Zhang, Li and Wang, Zhigang and Wang, Dong and Zhao, Bin}, title = {Cross from Left to Right Brain: Adaptive Text Dreamer for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1006-1019} }
The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Acharya_2026_CVPR, author = {Acharya, Anusha and Sapkota, Hitesh and Yu, Qi and Liu, Xumin}, title = {The Road Less Seen: Segment Exploration for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14147-14156} }
SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Gui and Zhou, YongSong and Deng, Kaijun and Cheah, Wooi Ping and Qu, Rong and Ren, Jianfeng and Shen, Linlin}, title = {SurgCoT: Advancing Spatiotemporal Reasoning in Surgical Videos through a Chain-of-Thought Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17011-17021} }
Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Shimin and Wei, Yuanyi and Zha, Fei and Guo, Yudong and Zhang, Juyong}, title = {Easy3E: Feed-Forward 3D Asset Editing via Rectified Voxel Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12730-12740} }
Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yutao and Zhao, Cheng and Mittal, Gaurav and Kukkala, Rohith and Chellappa, Rama and Peng, Cheng and Chen, Mei}, title = {Scenes as Tokens: Multi-Scale Normal Distributions Transform Tokenizer for General 3D Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38584-38594} }
S$^2$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Han and Huang, Tianyu and Wan, Zichen and Wu, Xiaohe and Zuo, Wangmeng}, title = {S\${\textasciicircum}2\$AM3D: Scale-controllable Part Segmentation of 3D Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14357-14366} }
Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Beemelmanns_2026_CVPR, author = {Beemelmanns, Till and Nekrasov, Alexey and Vilceanu, Stefan and Steinhaus, Jonas and Woopen, Timo and Leibe, Bastian and Eckstein, Lutz}, title = {Query2Uncertainty: Robust Uncertainty Quantification and Calibration for 3D Object Detection under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4677-4686} }
V^2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Jiancheng and Wang, Runze and Qian, Tianwen and Mahdi, Mohammad and Fu, Yanwei and Xue, Xiangyang and Huang, Xiaomeng and Van Gool, Luc and Paudel, Danda Pani and Fu, Yuqian}, title = {V{\textasciicircum}2-SAM: Marrying SAM2 with Multi-Prompt Experts for Cross-View Object Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16910-16919} }
PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Leezy and Kim, Seunggyu and Shim, Dongseok and Lee, Hyeonbeom}, title = {PTC-Depth: Pose-Refined Monocular Depth Estimation with Temporal Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12617-12627} }
LumiX: Structured and Coherent Text-to-Intrinsic Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xu and Zhang, Biao and Tang, Xiangjun and Li, Xianzhi and Wonka, Peter}, title = {LumiX: Structured and Coherent Text-to-Intrinsic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21942-21952} }
Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haojie and Weng, Shuchen and Liu, Jingqi and Yang, Siqi and Shi, Boxin and Wang, Xinlong}, title = {Audio-sync Video Instance Editing with Granularity-Aware Mask Refiner}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23150-23160} }
DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Siran and Zhang, Haoyuan and Gao, Li and Zhang, Tianshuo and Zhu, Xiangyu and Li, Bao and Zhao, Weisong and Lei, Zhen}, title = {DiffusionFF: A Diffusion-based Framework for Joint Face Forgery Detection and Fine-Grained Artifact Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14095-14105} }
ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seminara_2026_CVPR, author = {Seminara, Luigi and Moltisanti, Davide and Furnari, Antonino}, title = {ViterbiPlanNet: Injecting Procedural Knowledge via Differentiable Viterbi for Planning in Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31240-31249} }
CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Jeongbin and Choi, Dooseop and An, Taeg-Hyun and An, Kyounghwan and Min, Kyoung-Wook}, title = {CycleBEV: Regularizing View Transformation Networks via View Cycle Consistency for Bird's-Eye-View Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10986-10995} }
A Mixed Diet Makes DINO An Omnivorous Vision Encoder-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabra_2026_CVPR, author = {Kabra, Rishabh and Ovsjanikov, Maks and Hudson, Drew A. and Xia, Ye and Koppula, Skanda and Araujo, Andre and Carreira, Joao and Mitra, Niloy J.}, title = {A Mixed Diet Makes DINO An Omnivorous Vision Encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36850-36860} }
HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zihang and Liu, Zhu and Yan, Changbo and Liu, Jinyuan and Liu, Risheng}, title = {HiDRA: Hierarchical Degradation Representation and Adaptation with Generative Priors for Enhancing Infrared Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37434-37444} }
Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yu and Peng, Zelin and Wen, Changsong and Yang, Xiaokang and Shen, Wei}, title = {Unlocking 3D Affordance Segmentation with 2D Semantic Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6399-6409} }
Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Dahu and He, Chengshen and Zhang, Shaochen and Qian, Bo and Quan, Xiaochen and Zhang, Wencong and Wei, Xing}, title = {Omni-AD: A Large-scale and Versatile Benchmark for Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14157-14166} }
SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yi and Xiong, Hao and Xiao, Lin and Shi, Ranfeng and Gu, Qinying and Gu, Leilei}, title = {SCE-Depth: A Spherical Compound Eye Framework for Wide FOV Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26964-26973} }
GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Tianchen and Chen, Xuefeng and Chen, Yi and Chen, Qu and Xu, Yuyao and Yang, Lijin and Xu, Le and Zhang, Yu and Zhang, Bo and Huang, Wuxiong and Wang, Hesheng}, title = {GaussianDWM: 3D Gaussian Driving World Model for Unified Scene Understanding and Multi-Modal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10656-10667} }
Watch and Learn: Learning to Use Computers from Online Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Chan Hee and Song, Yiwen and Goyal, Palash and Su, Yu and Riva, Oriana and Palangi, Hamid and Pfister, Tomas}, title = {Watch and Learn: Learning to Use Computers from Online Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5421-5431} }
VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Swetha_2026_CVPR, author = {Swetha, Sirnam and Gupta, Rohit and Kulkarni, Parth Parag and Shatwell, David G and A Chan Santiago, Jeffrey and Siddiqui, Nyle and Fioresi, Joseph and Shah, Mubarak}, title = {VRR-QA: Visual Relational Reasoning in Videos Beyond Explicit Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32840-32849} }
ESAM++: Efficient Online 3D Perception on the Edge-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Qin and Aggarwal, Lavisha and Bandyopadhyay, Saptarashmi and Bahirwani, Vikas and Niethammer, Marc and Adeli, Ehsan and Colaco, Andrea}, title = {ESAM++: Efficient Online 3D Perception on the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39021-39030} }
CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Weiwei and Ji, Luping and Lei, Shipeng and Zhu, Sicheng and Huang, Jianghong and Ye, Mao}, title = {CHAL: Causal-guided Hierarchical Anomaly-aware Learning for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21357-21366} }
Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hanchao and Zhang, Fang-Lue and Zhang, Shining and Mu, Tai-Jiang and Hu, Shi-Min}, title = {Towards Highly-Constrained Human Motion Generation with Retrieval-Guided Diffusion Noise Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38291-38301} }
DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eungi and Back, Seung-hyeok and Kim, Hyung-Il and Yoo, Seok Bong}, title = {DeepProtect: Proactive Face-Swapping Defense using Identity Blending and Attribute Distortion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6569-6579} }
R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Seo_2026_CVPR, author = {Seo, Junhyuk and Seo, Sanghyuk and Kim, Dawoon and Oh, Heeseok}, title = {R3-PCQA: Ray-Reprojection-Reinforcement for No-Reference 3D Point Cloud Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9996-10005} }
Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering-
[pdf]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yalan and Wu, Hanzhou}, title = {Learning Anchor in Dual Orthogonal Space for Fast Multi-view Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1616-1626} }
History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Guangzhao and Wang, Shuo and Wang, Zihan and Xie, Guo-Sen and Yang, Yang and Pan, Jinshan and Sun, Qianru and Shu, Xiangbo}, title = {History to Future: Evolving Agent with Experience and Thought for Zero-shot Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15177-15187} }
CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Xiefan and Ma, Xinzhu and Zhang, Haiyu and Huang, Di}, title = {CTCal: Rethinking Text-to-Image Diffusion Models via Cross-Timestep Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43558-43567} }
Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Yujie and Wang, Meng and Li, Ruihui and Wu, Fan and Liu, Zhizhong and Tang, Zhuo and Li, Kenli}, title = {Learning Spatial-Temporal Consistency for 3D Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28567-28577} }
StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Ke and Li, Longfei and Yin, Yuyang and Liang, Hanwen and Luo, Guixun and Fang, Chen and Wang, Jue and Plataniotis, Konstantinos N. and Jin, Xiaojie and Zhao, Yao and Wei, Yunchao}, title = {StereoWorld: Geometry-Aware Monocular-to-Stereo Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40245-40255} }
Low-Rank Test-Time Training for Pre-Trained Point Cloud Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Ouyangzi and Shao, Feifei and Li, Kexin and Luo, Yawei and Song, Zikai and Liu, Ping and Zhang, Fengda and Wang, Hongwei and Xiao, Jun}, title = {Low-Rank Test-Time Training for Pre-Trained Point Cloud Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31472-31481} }
DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhenjie and Chai, Yilin and Jia, Xiaosong and Li, Qifeng and Shao, Yuqian and Zhu, Xuekai and Su, Haisheng and Yan, Junchi}, title = {DriveMoE: Mixture-of-Experts for Vision-Language-Action Model in End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10678-10688} }
AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haolin and Wang, Yaohua and Yan, Ze and Wen, Lijie and Huang, Biqing}, title = {AgentDet: A Shared-Blackboard Multi-Agent Framework for Zero-/Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41626-41635} }
Pixel2Phys: Distilling Governing Laws from Visual Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruikun and Yao, Jun and Hua, Yingfan and Tang, Shixiang and Qi, Biqing and Liu, Bin and Ouyang, Wanli and Lu, Yan}, title = {Pixel2Phys: Distilling Governing Laws from Visual Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41426-41435} }
Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yang and Li, Wuyang and Luan, Po-Chien and Alahi, Alexandre}, title = {Deformable Gaussian Occupancy: Decoupling Rigid and Nonrigid Motion with Factorized Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28588-28598} }
Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyao and Zhao, Zhendong and Chen, Xiaojun and Zhao, Xin and Xuan, Yuexin and Ji, Xiaoshuang}, title = {Exposing Functional Fusion: A New Class of Strategic Backdoor in Dynamic Prompt Architectures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13376-13385} }
Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yiwen and Guo, Zoey and Zhu, Kaixin and Zhang, Ray and Chen, Qizhi and Jiang, Dongzhi and Liu, Junli and Zeng, Bohan and Song, Haoming and Qu, Delin and Bai, Tianyi and Xu, Dan and Zhang, Wentao and Zhao, Bin}, title = {Are We Ready for RL in Text-to-3D Generation? A Progressive Investigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3197-3207} }
Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yulong and Xu, Hua and Cai, Yiyang and Jiang, Chunyang and Han, Sirui and Guo, Yike}, title = {Modeling the Brain's Grammar: ROI-Guided fMRI Pretraining for Transferable and Interpretable Vision Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6900-6909} }
Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Simon_2026_CVPR, author = {Simon, Christian and Ishii, Masato and Wang, Wei-Yao and Saito, Koichi and Hayakawa, Akio and Shim, Dongseok and Zhong, Zhi and Cui, Shuyang and Shibuya, Takashi and Takahashi, Shusuke and Mitsufuji, Yuki}, title = {Echoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15840-15849} }
Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bi_2026_CVPR, author = {Bi, Tianci and Zhang, Xiaoyi and Lu, Yan and Zheng, Nanning}, title = {Vision Foundation Models Can Be Good Tokenizers for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43310-43319} }
Flowception: Temporally Expansive Flow Matching for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ifriqi_2026_CVPR, author = {Ifriqi, Tariq Berrada and Nguyen, John and Alahari, Karteek and Verbeek, Jakob and Chen, Ricky T. Q.}, title = {Flowception: Temporally Expansive Flow Matching for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16185-16195} }
OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Yoonjin and Kim, Yongjin and Kim, Hyomin and Chi, Donghwan and Kim, Sungwoong}, title = {OSPO: Object-Centric Self-Improving Preference Optimization for Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7620-7631} }
EVLF: Early Vision-Language Fusion for Generative Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Wenqi and Zou, Yawen and Li, Guang and Gu, Chunzhi and Zhang, Chao}, title = {EVLF: Early Vision-Language Fusion for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33953-33962} }
UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Hewen and Wei, Cong and Liang, Dashuang and Huang, Zepeng and Gao, Pengfei and Zhou, Ziqi and Xue, Lulu and Yan, Pengfei and Wei, Xiaoming and Li, Minghui and Hu, Shengshan}, title = {UFVideo: Towards Unified Fine-Grained Video Cooperative Understanding with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31833-31844} }
DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Wenxuan and Dai, Ming and Lu, Huimin and Yang, Wankou}, title = {DeRVOS: Decoupling Consistent Trajectory Generation and Multimodal Understanding for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24651-24662} }
Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Hyundong and Han, Dongyoon and Kim, Eunwoo}, title = {Which Concepts to Forget and How to Refuse? Decomposing Concepts for Continual Unlearning in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32288-32298} }
Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jialuo and Li, Bin and Li, Jiahao and Lu, Yan}, title = {Divide, then Ground: Adapting Frame Selection to Query Types for Long-Form Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11369-11380} }
SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Xiang and Wang, Xiangbo and Zhong, Tieshi and Wang, Chengkai and Zhao, Yiting and Xu, Tianxiang and Kuang, Zhenzhong and Qin, Feiwei and Yin, Xuefei and Zhu, Yanming}, title = {SR3R: Rethinking Super-Resolution 3D Reconstruction With Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33384-33393} }
Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyang and Zheng, Kecheng and Zhu, Minfeng and Wu, Wei and Lu, Fan and Zhai, Wei and Chen, Wei}, title = {Diffusion Guided Chain-of-Vision for Large Autoregressive Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2357-2368} }
Spatial Retrieval Augmented Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Xiaosong and Zhang, Chenhe and Jiang, Yule and Wong, Songbur and Zhang, Zhiyuan and Chen, Chen and Zhang, Shaofeng and Zhou, Xuanhe and Yang, Xue and Yan, Junchi and Jiang, Yu-Gang}, title = {Spatial Retrieval Augmented Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17787-17797} }
WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Wenhua and Guan, Huai and Liu, Zhe and Wang, Hesheng}, title = {WeatherCity: Urban Scene Reconstruction with Controllable Multi-Weather Transformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40949-40958} }
Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziling and Yang, Shuya and Lu, Jialin and Chow, Ka-Ho}, title = {Protego: User-Centric Pose-Invariant Privacy Protection Against Face Recognition-Induced Digital Footprint Exposure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10293-10302} }
RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yijiang and Kotian, Kunal and Marjaninejad, Ali and Friedenberg, Meir and Pavani, Kaushik and Dasgupta, Sunny}, title = {RMIR: A Benchmark Dataset for Reasoning-Intensive Multimodal Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2725-2734} }
MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiale and Zhao, Wang and Shan, Ying}, title = {MeshWeaver: Sparse-Voxel-Guided Surface Weaving for Autoregressive Mesh Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5912-5922} }
TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Junyoung and Oh, Youngjin and Cho, Nam Ik}, title = {TM-BSN: Triangular-Masked Blind-Spot Network for Real-World Self-Supervised Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29877-29886} }
ReMoT: Reinforcement Learning with Motion Contrast Triplets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Cong and Guo, Zeyu and Li, Jiangyang and Dong, Songlin and Bai, Yifan and Peng, Lin and Ma, Zhiheng and Gong, Yihong}, title = {ReMoT: Reinforcement Learning with Motion Contrast Triplets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5487-5498} }
LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Debnath_2026_CVPR, author = {Debnath, Soumyaratna and Manh, Bui Duc and Liu, Zinan and Wang, Lin}, title = {LLMind: Bio-inspired Training-free Adaptive Visual Representations for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3133-3142} }
TextOVSR: Text-Guided Real-World Opera Video Super-Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Hua and Xu, Xin and Liu, Wei and Wu, Jiayi and Jiang, Kui and Ma, Fei and Tian, Qi}, title = {TextOVSR: Text-Guided Real-World Opera Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2156-2165} }
Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Junyao and Cheng, Zhongwei and Wong, Waikeung and Zou, Xingxing}, title = {Garments2Look: A Multi-Reference Dataset for High-Fidelity Outfit-Level Virtual Try-On with Clothing and Accessories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1123-1133} }
Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tasnim_2026_CVPR, author = {Tasnim, Nazia and Prabhumoye, Shrimai and Plummer, Bryan A.}, title = {Decompose, Mix, Adapt: A Unified Framework for Parameter-Efficient Neural Network Recombination and Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19380-19392} }
Text-Driven 3D Hand Motion Generation from Sign Language Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bensabath_2026_CVPR, author = {Bensabath, L\'eore and Petrovich, Mathis and Varol, G\"ul}, title = {Text-Driven 3D Hand Motion Generation from Sign Language Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23095-23105} }
Adaptive Capacity Autoregressive Visual Tracking-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Tong and Bai, Yifan and Liang, Shiyi and Niu, Ruigang and Wei, Xing}, title = {Adaptive Capacity Autoregressive Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13574-13583} }
Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guangxun and Haberle, Mason and Geiger, Davi}, title = {Stable Mean Flow: Lyapunov-Inspired One-Step Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9223-9232} }
Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Wenjin and Sun, Xiaoxiao and Fan, Hehe}, title = {Incentivizing Generative Zero-Shot Learning via Outcome-Reward Reinforcement Learning with Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5499-5510} }
Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rongchao and Li, Chengxin and Lou, Yiwei and Shi, Yuling and Wang, Hanpin and Huang, Yu}, title = {Steering Where to Diffuse: Generative Modeling of Phenotypic Response Simulation with Steered Diffusion Bridge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27367-27377} }
SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Yip_2026_CVPR, author = {Yip, Koonting and Zhao, Qiyan and Yu, Wenhao and Yuen, Liangyu and Li, Mingkai and Zhang, Xiaofeng and Ji, Jianmin and Zhang, Yanyong and Jiang, Qing and Yuen, Ka-Veng}, title = {SoPE: Spherical Coordinate-Based Positional Embedding for Enhancing Spatial Perception of 3D LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33714-33726} }
3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Ze-Xin and Liu, Liu and Wang, Xinjie and Sui, Wei and Su, Zhizhong and Yang, Jian and Xie, Jin}, title = {3D-Fixer: Coarse-to-Fine In-place Completion for 3D Scenes from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12753-12763} }
EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiali and Xue, Yuqi and Hei, Xusen and Fu, DingBa and Wei, Yuancheng and Xie, Jiayuan and Cai, Yi}, title = {EduDiag: A Benchmark for Educational Diagnostic Reasoning with Error Tracing and Correction on Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15890-15901} }
ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Qihang and Chen, Xuesong and Yang, Chenye and Shi, Shaoshuai and Li, Hongsheng}, title = {ColaVLA: Leveraging Cognitive Latent Reasoning for Hierarchical Parallel Trajectory Planning in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17809-17819} }
Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yubo and An, Yitong and Yang, Xin and Wuerkaixi, Abudukelimu and Cheng, Xuxin and Xie, Fengying and Jiang, Zhiguo and Liu, Cao and Zeng, Ke and Zhang, Haopeng}, title = {Breaking the Illusion: When Positive Meets Negative in Multimodal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4210-4220} }
UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Qilin and Huynh, Quynh Anh and Le, Long and Wang, Chen and Chen, Chuhao and Lucas, Ryan and Eaton, Eric and Liu, Lingjie}, title = {UniPixie: Unified and Probabilistic 3D Physics Learning via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19907-19916} }
Forecasting 3D Scanpaths in Egocentric Video-
[pdf]
[supp]
[bibtex]@InProceedings{Ryan_2026_CVPR, author = {Ryan, Fiona and Ananthabhotla, Ishwarya and Qian, Yijun and Hoffman, Judy and Rehg, James M. and Ithapu, Vamsi Krishna and Murdock, Calvin}, title = {Forecasting 3D Scanpaths in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42824-42835} }
SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dhakal_2026_CVPR, author = {Dhakal, Aayush and Khanal, Subash and Sastry, Srikumar and Arndt, Jacob and Dias, Philipe and Lunga, Dalton and Jacobs, Nathan}, title = {SimLBR: Learning to Detect Fake Images by Learning to Detect Real Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35472-35482} }
$L^{2}DGS$: Low-Light Dynamic Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashish and Ambasamduram, Rajagopalan N}, title = {\$L{\textasciicircum}\{2\}DGS\$: Low-Light Dynamic Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19096-19106} }
Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiayang and Chen, Xinyang and Lv, Ke and Guan, Weili}, title = {Boosting Visual Reprogramming for CLIP with Dual Granularity Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29347-29356} }
CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yizheng and Zhuang, Yiyu and Xu, Qipeng and Wang, Haixiang and Zhu, Jiahe and Tian, Jing and Zhu, Siyu and Zhu, Hao}, title = {CrowdGaussian: Reconstructing High-Fidelity 3D Gaussians for Human Crowd from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11006-11016} }
Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Junyuan and Li, Qiankun and Meng, Linghao and He, Zhicheng and Zhou, Xinliang and Wang, Kun and Liu, Yang and Jin, Yueming}, title = {Granulon: Awakening Pixel-Level Visual Encoders with Adaptive Multi-Granularity Semantics for MLLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26317-26327} }
AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Likui and Tang, Tao and Zhan, Zhihao and Chen, Xiuwei and Chen, Zisheng and Han, Jianhua and Zhu, Jiangtong and Xu, Pei and Xu, Hang and Wu, Hefeng and Lin, Liang and Liang, Xiaodan}, title = {AtomicVLA: Unlocking the Potential of Atomic Skill Learning in Robots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20743-20754} }
Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Xiangyu and Qiu, Zesong and Wu, Zhuguanyu and Wang, Fanzhou and Lin, Zhiqian and Ren, Tianxiang and Lin, Dahua and Gong, Ruihao and Yang, Lei}, title = {Phased DMD: Few-step Distribution Matching Distillation via Score Matching within Subintervals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41667-41676} }
EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zheng, Yike and Dai, Ziyun and Ding, Henghui}, title = {EffectErase: Joint Video Object Removal and Insertion for High-Quality Effect Erasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2005-2014} }
SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Khanh Binh and Park, Chae Jung}, title = {SOUPLE: Enhancing Audio-Visual Localization and Segmentation with Learnable Prompt Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8674-8683} }
ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Byeongjun and Kim, Byung-Hoon and Chung, Hyungjin and Ye, Jong Chul}, title = {ReDirector: Creating Any-Length Video Retakes with Rotary Camera Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11163-11173} }
Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Chen, Zhe and Zhang, Jing and Tao, Dacheng}, title = {Heuristic-inspired Reasoning Priors Facilitate Data-Efficient Referring Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10082-10092} }
GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Bei and Chen, Yixin and Lu, Ruijie and Zeng, Gang and Zha, Hongbin and Pei, Yuru and Huang, Siyuan}, title = {GaussianFluent: Gaussian Simulation for Dynamic Scenes with Mixed Materials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21583-21593} }
SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fillioux_2026_CVPR, author = {Fillioux, Leo and Chakraborty, Omprakash and Ben Ayed, Ismail and Courn\`ede, Paul-Henry and Christodoulidis, Stergios and Vakalopoulou, Maria and Dolz, Jose}, title = {SoC: Semantic Orthogonal Calibration for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4772-4782} }
Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Guangyan and Shao, Qi and Cui, Te and Zhou, Zichen and Mao, Weixin and Yang, Luojie and Wang, Meiling and Yang, Yi and Chen, Hua and Yue, Yufeng}, title = {Learning a Unified Latent Action Space from Videos with Action-centric Cycle Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12871-12880} }
FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Weijie and Yang, Ming-Hsuan and Shu, Zhixin}, title = {FaceCam: Portrait Video Camera Control via Scale-Aware Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30607-30617} }
Language-Free Generative Editing from One Visual Example-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Elezabi_2026_CVPR, author = {Elezabi, Omar and Zamfir, Eduard and Wu, Zongwei and Timofte, Radu}, title = {Language-Free Generative Editing from One Visual Example}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1078-1088} }
ORION: ORthonormal Text Encoding for Universal VLM AdaptatION-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chakraborty_2026_CVPR, author = {Chakraborty, Omprakash and Dolz, Jose and Ben Ayed, Ismail}, title = {ORION: ORthonormal Text Encoding for Universal VLM AdaptatION}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31556-31565} }
Mobile-VTON: High-Fidelity On-Device Virtual Try-On-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Zhenchen and Chen, Ce and Lin, Runqi and Huang, Jiaxin and Chen, Tianxi and Xu, Yanwu and Liu, Tongliang and Gong, Mingming}, title = {Mobile-VTON: High-Fidelity On-Device Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38081-38090} }
EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haolan and Cheng, Keli and Wang, Lei and Bi, Ning and Liu, Xiaoming}, title = {EmoTaG: Emotion-Aware Talking Head Synthesis on Gaussian Splatting with Few-Shot Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10921-10931} }
RINO: Rotation-Invariant Non-Rigid Correspondences-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Maolin and Hu-Chen, Shao Jie and Deng, Congyue and Marin, Riccardo and Guibas, Leonidas and Cremers, Daniel}, title = {RINO: Rotation-Invariant Non-Rigid Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34683-34693} }
Native and Compact Structured Latents for 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Jianfeng and Chen, Xiaoxue and Xu, Sicheng and Wang, Ruicheng and Lv, Zelong and Deng, Yu and Zhu, Hongyuan and Dong, Yue and Zhao, Hao and Yuan, Nicholas Jing and Yang, Jiaolong}, title = {Native and Compact Structured Latents for 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14419-14429} }
IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Luo, Xiangyang and Wang, Hao and Zhang, Jiawei and Zhang, Yi and Wang, Jinwei}, title = {IP-Adapter Is All You Need: Towards Fine-Tuning-Free Diffusion-Based Talking Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32967-32977} }
DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiayi and Yao, Yuxin and Lu, Qiuhang and Zhang, Juyong}, title = {DualReg: Dual-Space Filtering and Reinforcement for Rigid Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39031-39041} }
UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Kaiyuan and Shen, Yingying and Zhu, Ziyue and Tu, Mingfei and Zhu, Haohui and Sun, Haiyang and Wang, Bing and Chen, Guang and Ye, Hangjun}, title = {UFO: Unifying Feed-Forward and Optimization-based Methods for Large Driving Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21849-21859} }
AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuzhi and Wu, Xinran and Wang, Song and Kong, Lingdong and Zhao, Ziping}, title = {AdaSFormer: Adaptive Serialized Transformers for Monocular Semantic Scene Completion from Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34148-34159} }
Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jinyu and Hu, Tianqi and Hu, Xiaonan and Zhou, Letian and Cao, Songliang and Zhang, Meng and Lu, Hao}, title = {Plant Taxonomy Meets Plant Counting: A Fine-Grained, Taxonomic Dataset for Counting Hundreds of Plant Species}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {167-177} }
Heterogeneous Decentralized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhiying and Seraj, Raihan and Villagra, Marcos and Roy, Bidhan}, title = {Heterogeneous Decentralized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2391-2400} }
XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hongxia and Chen, Yixin and Wen, Jiali and Li, Litao and Liu, Qianyun and Zhang, Kaijie}, title = {XSeg: A Large-scale X-ray Contraband Segmentation Benchmark For Real-World Security Screening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24950-24959} }
Zoo3D: Zero-Shot 3D Object Detection at Scene Level-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lemeshko_2026_CVPR, author = {Lemeshko, Andrey and Gabdullin, Bulat and Drozdov, Nikita and Konushin, Anton and Rukhovich, Danila and Kolodiazhnyi, Maksim}, title = {Zoo3D: Zero-Shot 3D Object Detection at Scene Level}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25820-25829} }
TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Boshen and Xiao, Zihan and Li, Jiaze and Ju, Jianzhong and Luo, Zhenbo and Luan, Jian and Jin, Qin}, title = {TimeViper: A Hybrid Mamba-Transformer Vision-Language Model for Efficient Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40479-40493} }
FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yixian and Yu, Haolin and Tang, Jiadong and Gao, Yu and Wang, Xihan and Yue, Yufeng and Yang, Yi}, title = {FilterGS: Traversal-Free Parallel Filtering and Adaptive Shrinking for Large-Scale LoD 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26052-26061} }
Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qi and Wu, Mian and Zhang, Yuyang and Yuan, Mingqi and Zhang, Wenyao and You, Haoxiang and Wang, Yunbo and Jin, Xin and Yang, Xiaokang and Zeng, Wenjun}, title = {Goal-Driven Reward by Video Diffusion Models for Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8077-8086} }
Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Uddin_2026_CVPR, author = {Uddin, Mostofa Rafid and Tabib, HM Shadman and Nguyen, Thanh-Huy and Gandhi, Kashish and Xu, Min}, title = {Unsupervised Multi-Scale Segmentation of 3D Subcellular World with Stable Diffusion Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22744-22752} }
Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, Gyeongsik}, title = {Enhancing Hands in 3D Whole-Body Pose Estimation with Conditional Hands Modulator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8891-8900} }
LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Dazhong and Gu, Jingjing and Zhou, Qiang and Zhao, Meng and Sun, Ying}, title = {LiDAR-to-4DRadar Diffusion Bridge via Cross-Modal Alignment and Translation in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17110-17120} }
GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Gao, Junjie and Pan, Yongzhou and Song, Siyuan and Zhang, Zixuan and Xiao, Jiaping and Feroskhan, Mir}, title = {GeniNav: Generative Model Driven Image-Goal Navigation via Imagination-Guided Consistency Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {996-1005} }
CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Liangbin and Liao, Xiaohua and Cui, Chaoqun and Wang, Shijing and Huang, Zhaolong and Du, Yanlong and Mao, Wenji}, title = {CineSRD: Leveraging Visual, Acoustic, and Linguistic Cues for Open-World Visual Media Speaker Diarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8836-8845} }
Learning Personalized Photographic Style from Pairwise User Preferences-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jinwoo and Yoo, Jihye and Kim, Seon Joo}, title = {Learning Personalized Photographic Style from Pairwise User Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1134-1144} }
Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yangyang and Ke, Junbo and Wen, You-Wei and Wang, Chao}, title = {Reparameterized Tensor Ring Functional Decomposition for Multi-Dimensional Data Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26389-26398} }
Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuanning and Shi, Zihao and Zeng, Hao and Xia, Xiaobo and Jing, Bingyi and Wei, Hongxin}, title = {Semi-Supervised Conformal Prediction With Unlabeled Nonconformity Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17704-17713} }
EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Wenjing and Yang, Chuanguang and An, Zhulin and Huang, Libo and Diao, Boyu and Xu, Yongjun}, title = {EfficientVPR: Toward Efficient Visual Place Recognition via Scene-Aware Prompt Tuning and Adaptive Feature Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33738-33748} }
DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haochen and Zhang, Rui and Yao, Hantao and Zhang, Xin and Hao, Yifan and Peng, Shaohui and Zhao, Yongwei and Li, Ling}, title = {DA-Mamba: Learning Domain-Aware State Space Model for Global-Local Alignment in Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8000-8010} }
MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2026_CVPR, author = {Mu, Wenjie and Li, Zhan and Su, Chuanzhou and Shen, Xuanyi and Liu, Ziniu and Lu, Fan and Mo, Yujian and Zhao, Junqiao and Feng, Tiantian and Ye, Chen and Chen, Guang}, title = {MU-GeNeRF: Multi-view Uncertainty-guided Generalizable Neural Radiance Fields for Distractor-aware Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38049-38059} }
Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Umangi and Kim, Vladimir and Gadelha, Matheus and Gilitschenski, Igor and Chen, Zhiqin}, title = {Material Magic Wand: Material-Aware Grouping of 3D Parts in Untextured Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6377-6387} }
VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Sinan and Guo, Jiahao and Li, Bo and Cui, Shuhao and Xu, Zhengzhuo and Luo, Yifu and Wei, Yongxian and Gai, Kun and Wang, Xinggang and Wu, Kai and Yuan, Chun}, title = {VQRAE: Representation Quantization Autoencoders for Multimodal Understanding, Generation and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30322-30334} }
Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shufan and Gu, Jiuxiang and Liu, Kangning and Lin, Zhe and Wei, Zijun and Grover, Aditya and Kuen, Jason}, title = {Sparse-LaViDa: Sparse Multimodal Discrete Diffusion Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36103-36114} }
Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeonseong and Jang, Hyun-Kurl and Yoon, Kuk-Jin}, title = {Test-Time Training for LiDAR Semantic Segmentation under Corruption via Geometric Inlier Discrimination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24206-24216} }
LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jianshi and Zhu, Minghang and Liu, Dunqiang and Li, Wen and Ao, Sheng and Shen, Siqi and Wen, Chenglu and Wang, Cheng}, title = {LEADER: Learning Reliable Local-to-Global Correspondences for LiDAR Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9932-9942} }
Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Demidov_2026_CVPR, author = {Demidov, Dmitry and Zaheer, Muhammad Zaigham and Han, Zongyan and Thawakar, Omkar and Anwer, Rao}, title = {Thinking Beyond Labels: Vocabulary-Free Fine-Grained Recognition using Reasoning-Augmented LMMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16855-16864} }
SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Chen, Xieyuanli and Li, Junxiang and Tang, Jie and Wu, Tao}, title = {SinGeo: Unlock Single Model's Potential for Robust Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19403-19412} }
JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yunlong and Wang, Linqing and Lin, Kunjie and Lin, Zixu and Gong, Kaixiong and Li, Wenbo and Lin, Bin and Li, Zhenxi and Zhang, Shiyi and Peng, Yuyang and Dai, Wenxun and Ding, Xinghao and Wang, Chunyu and Lu, Qinglin}, title = {JarvisEvo: Towards a Self-Evolving Photo Editing Agent with Synergistic Editor-Evaluator Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27291-27302} }
SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head-
[pdf]
[supp]
[bibtex]@InProceedings{Nazarieh_2026_CVPR, author = {Nazarieh, Fatemeh and Feng, Zhenhua and Kanojia, Diptesh and Kittler, Josef and Awais, Muhammad}, title = {SyncDreamer: Controllable and Expressive Avatar Generation Beyond the Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25121-25130} }
FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Congcong and Ma, Haolong and Li, Hui and Shen, Zhongwei and Luo, Xiaoqing and Song, Xiaoning and Wu, Xiao-jun}, title = {FusionRegister: Every Infrared and Visible Image Fusion Deserves Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41551-41561} }
SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Chaojun and Chen, Cheng and Wang, Xiaofeng and Zhu, Zheng and Zheng, Wenzhao and Wang, Boyuan and Chen, Tianrun and Zhao, Guosheng and Li, Haoyun and Dong, Zhehao and Zhang, Qiang and Ye, Yun and Wang, Yang and Huang, Guan and Mei, Wenjun}, title = {SwiftVLA: Unlocking Spatiotemporal Dynamics for Lightweight VLA Models at Minimal Overhead}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13474-13485} }
SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yuechen and Zhang, Xiaoyan and Shan, Yicheng and Hao, Zhu and Tang, Rui and Wei, Rong and Song, Mingli and Wan, Yuanyu and Song, Jie}, title = {SpatiaLQA: A Benchmark for Evaluating Spatial Logical Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2645-2657} }
Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Wenxuan and Wang, Chenglei and Yan, Chengzhi and Qian, Xuelin and Zhang, Yanning}, title = {Robustness Under Data Scarcity: Few-Shot Continual Adversarial Training for Evolving Threats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34908-34917} }
H^2A^2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Tao and An, Tao and Liu, Feng and Jin, Wensheng and Li, Zhengyu and Zhao, Lijun and Li, Ruifeng}, title = {H{\textasciicircum}2A{\textasciicircum}2: Homogeneity-Aware and Heterogeneity-Aware Feature Perception for Unified Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40717-40726} }
Omni-MMSI: Toward Identity-attributed Social Interaction Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinpeng and Lai, Bolin and Chen, Hardy and Deng, Shijian and Xie, Cihang and Zhou, Yuyin and Rehg, James M. and Tian, Yapeng}, title = {Omni-MMSI: Toward Identity-attributed Social Interaction Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8684-8696} }
Mirai: Autoregressive Visual Generation Needs Foresight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yonghao and Huang, Lang and Wang, Zerun and Li, Runyi and Yamasaki, Toshihiko}, title = {Mirai: Autoregressive Visual Generation Needs Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30511-30520} }
SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haowen and Yao, Shaoxiong and Chen, Haonan and Gao, Jiawei and Mao, Jiayuan and Huang, Jia-Bin and Du, Yilun}, title = {SIMPACT: Simulation-Enabled Action Planning using Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20790-20801} }
EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Lu and Nakashima, Yuta and Garcia, Noa}, title = {EMMA: Concept Erasure Benchmark with Comprehensive Semantic Metrics and Diverse Categories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37247-37257} }
Unsupervised 3d Motion Estimation Using Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Han and Zhai, Wei and Zhao, Tiesong and Li, Bin and Cao, Yang and Zha, Zheng-jun}, title = {Unsupervised 3d Motion Estimation Using Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8067-8076} }
What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xingsong and Du, Yongkun and Zhang, JiaXin and Li, Chen and LYU, Jing and Chen, Zhineng}, title = {What's Wrong with Synthetic Data for Scene Text Recognition? A Strong Synthetic Engine with Diverse Simulations and Self-Evolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16645-16654} }
Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Heewon and Joe, Mugon and Kim, Miru and Im, Kyungjin and Kwon, Minhae}, title = {Fed-ADE: Adaptive Learning Rate for Federated Post-adaptation under Distribution Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24587-24597} }
Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Keyao and Liu, Shuai and Shi, Hengda and Shi, Lukui and Chen, Haiyong}, title = {Beyond Duality: A Hybrid Framework of Leveraging Shared and Private Features for RGB-Event Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4415-4424} }
Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Zhangxing and Wei, Shuwen and Remedios, Samuel W. and Chen, Junyu and Carass, Aaron and Dewey, Blake and Prince, Jerry L}, title = {Solving a Nonlinear Blind Inverse Problem for Tagged MRI with Physics and Deep Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41730-41740} }
HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Yang and Yang, Xiaomeng and Deng, Keli and Qian, Yuntao}, title = {HierUQ: Hierarchical Uncertainty Quantification with Adaptive Granularity Reconciliation for Degraded Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11240-11249} }
Spatiotemporal Pyramid Flow Matching for Climate Emulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Irvin_2026_CVPR, author = {Irvin, Jeremy A. and Han, Jiaqi and Wang, Zikui and Alharbi, Abdulaziz and Zhao, Yufei and Bayarsaikhan, Nomin-Erdene and Visioni, Daniele and Ng, Andrew Y. and Watson-Parris, Duncan}, title = {Spatiotemporal Pyramid Flow Matching for Climate Emulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42180-42190} }
PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Hsiao, Yu-Jou and Jiang, Jin-Hui and Liu, Yu-Lun and Hsu, Chih-Chung}, title = {PhaSR: Generalized Image Shadow Removal with Physically Aligned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22679-22688} }
Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Yongqi and Yang, Kunshan and Li, Linze and Zhang, Yiyang and Jing, Mengmeng and Zuo, Lin}, title = {Stable Spike: Dual Consistency Optimization via Bitwise AND Operations for Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {892-902} }
H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Ayushi and Bhusal, Dipkamal and Clifford, Michael and Rastogi, Nidhi}, title = {H-Sets: Hessian-Guided Discovery of Set-Level Feature Interactions in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17067-17076} }
Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors-
[pdf]
[supp]
[bibtex]@InProceedings{Moghadam_2026_CVPR, author = {Moghadam, Puria Azadi and Mirabadi, Ali Khajegili and Maneshgar, Behnam and Farahani, Hossein and Bashashati, Ali}, title = {Factorized Context Aggregation for Robust Cancer Risk Estimation via Soft Re-Ranked Retrieval and Hierarchical Anchors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34095-34105} }
Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Kunwar Maheep and Chen, Jianchun and Golyanik, Vladislav and Garbin, Stephan J. and Beeler, Thabo and Dabral, Rishabh and Habermann, Marc and Theobalt, Christian}, title = {Relightable Holoported Characters: Capturing and Relighting Dynamic Human Performance from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28882-28892} }
GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zilong and Zheng, Xiang and Wang, Xiaosen and Wang, Bo and Ma, Xingjun}, title = {GenBreak: Red Teaming Text-to-Image Generation Using Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15730-15739} }
Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Youming and Peng, Songyou and Zhang, Junyi and Heal, Kathryn and Sun, Tiancheng and Flynn, John and Marschner, Steve and Chai, Lucy}, title = {Selfi: Self-improving Reconstruction Engine via 3D Geometric Feature Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7351-7361} }
Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jinlong and Jiang, Liyuan and Zhang, Haonan and Sebe, Nicu}, title = {Token Reduction via Local and Global Contexts Optimization for Efficient Video Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10451-10461} }
Match-and-Fuse: Consistent Generation from Unstructured Image Sets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feingold_2026_CVPR, author = {Feingold, Kate and Kaduri, Omri and Dekel, Tali}, title = {Match-and-Fuse: Consistent Generation from Unstructured Image Sets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30490-30499} }
Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Yaowen and Cao, Zhen and Zheng, Xu and Mi, Xiaoxin and Dong, Zhen}, title = {Denoise and Align: Towards Source-Free UDA for Robust Panoramic Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32226-32235} }
Decoupling Defense Strategies for Robust Image Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahui and Deng, Zehang and Zhang, Zeyu and Li, Chaoyang and Jia, Lianchen and Sun, Lifeng}, title = {Decoupling Defense Strategies for Robust Image Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3316-3325} }
What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Zhihan and He, Lijun and Liang, Jiaxi and Fu, Xinzhu and Bi, Haixia and Li, Fan}, title = {What Your Features Reveal: Data-Efficient Black-Box Feature Inversion Attack for Split DNNs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13366-13375} }
OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Meilin and Wang, Jiaying and Shan, Jing}, title = {OmniFM: Toward Modality-Robust and Task-Agnostic Federated Learning for Heterogeneous Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21099-21109} }
tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chen and Tan, Hao and Yifan, Wang and Chen, Zhiqin and Liu, Yuheng and Sunkavalli, Kalyan and Bi, Sai and Liu, Lingjie and Hu, Yiwei}, title = {tttLRM: Test-Time Training for Long Context and Autoregressive 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36582-36592} }
Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongshan and Lin, Xiaohuan and Zhang, Lefei and Cai, Zhihua}, title = {Orthogonal Spatial-Aware Multi-View Anchor Graph Clustering for Incomplete Remote Sensing Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20532-20541} }
Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Peiyu and Kothawade, Suraj and Xie, Sirui and Wu, Ying Nian and Fei, Hongliang}, title = {Designing Instance-Level Sampling Schedules via REINFORCE with James-Stein Shrinkage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36040-36050} }
MVInverse: Feed-forward Multiview Inverse Rendering in Seconds-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiangzuo and Ren, Chengwei and Zhou, Jun and Li, Xiu and Liu, Yuan}, title = {MVInverse: Feed-forward Multiview Inverse Rendering in Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37343-37357} }
MM-ACT: Learn from Multimodal Parallel Generation to Act-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Haotian and Chen, Xinyi and Wang, Bin and Chen, Mingkang and Liu, Yitian and Zhang, Yuhao and Chen, Zanxin and Yang, Tianshuo and Chen, Yilun and Pang, Jiangmiao and Liu, Dong and Yang, Xiaokang and Mu, Yao and Shao, Wenqi and Luo, Ping}, title = {MM-ACT: Learn from Multimodal Parallel Generation to Act}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35080-35090} }
Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Bin and Sun, Wei and Wang, Qianqian and Feng, Wei and Chen, Yijie and Zhang, Haixi}, title = {Rethinking Cross-Modal Anchor Alignment for Mitigating Error Accumulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8664-8673} }
CountGD++: Generalized Prompting for Open-World Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Amini-Naieni_2026_CVPR, author = {Amini-Naieni, Niki and Zisserman, Andrew}, title = {CountGD++: Generalized Prompting for Open-World Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37725-37734} }
Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ben_Hayun_2026_CVPR, author = {Ben Hayun, Omer and Betser, Roy and Levi, Meir Yossef and Kassel, Levi and Gilboa, Guy}, title = {Training-free Detection of Generated Videos via Spatial-Temporal Likelihoods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16299-16310} }
PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jaehyun and Hur, Jiwan and Han, Gyojin and Yu, Jaemyung and Kim, Junmo}, title = {PRISM: Video Dataset Condensation with Progressive Refinement and Insertion for Sparse Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26348-26357} }
Interpretable Debiasing of Vision-Language Models for Social Fairness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Na Min and Jang, Yoonna and Hirota, Yusuke and Hachiuma, Ryo and Augenstein, Isabelle and Shim, Hyunjung}, title = {Interpretable Debiasing of Vision-Language Models for Social Fairness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39326-39337} }
D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihan and Lee, Seungjun and Dai, Guangzhao and Lee, Gim Hee}, title = {D3D-VLP: Dynamic 3D Vision-Language-Planning Model for Embodied Grounding and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32463-32474} }
WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Shaoheng and Jiang, Hanwen and Bai, Yunpeng and Mitra, Niloy J. and Huang, Qixing}, title = {WorldReel: 4D Video Generation with Consistent Geometry and Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11195-11206} }
Property-Informed Diffusion-Based Text-to-Microstructure Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Bingxuan and Wang, Hongsong and Gui, Jie}, title = {Property-Informed Diffusion-Based Text-to-Microstructure Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36758-36768} }
Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moreau_2026_CVPR, author = {Moreau, Arthur and Shaw, Richard and Nazarczuk, Michal and Shin, Jisu and Tanay, Thomas and Zhang, Zhensong and Xu, Songcen and P\'erez-Pellitero, Eduardo}, title = {Off The Grid: Detection of Primitives for Feed-Forward 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11756-11766} }
Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Liangyu and Huang, Yufei and Lei, Mingkun and Zhao, Tong and Wang, Ruoyu and Chi, Changxi and Wang, Yiwei and Zhang, Chi}, title = {Improving Diffusion Generalization with Weak-to-Strong Segmented Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43697-43706} }
IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Boutros_2026_CVPR, author = {Boutros, Fadi and Caldeira, Eduarda and Chettaoui, Tahar and Damer, Naser}, title = {IDperturb: Enhancing Variation in Synthetic Face Generation via Angular Perturbations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40119-40129} }
DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Qichao and Lu, Yunhong and Cao, Hengyuan and Zhang, Junyi and Zhang, Min}, title = {DMGD: Train-Free Dataset Distillation with Semantic-Distribution Matching in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12417-12427} }
CamPI: Physical Adversarial Examples through Camera Power Signal Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Yanze and Lv, Mingyuan and Jiang, Qinhong and Jiang, Yan and Yan, Chen and Ji, Xiaoyu and Xu, Wenyuan}, title = {CamPI: Physical Adversarial Examples through Camera Power Signal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6611-6620} }
FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Xinyuan and Luo, Tao and Peng, Gengyun and Wang, Yaobing and Ren, Kui and Wang, Dongxia}, title = {FlowHijack: A Dynamics-Aware Backdoor Attack on Flow-Matching Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22879-22888} }
Learning Compact 3D Representations from Feed-Forward Novel View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Honggyu and Jung, Jaewoo and Kim, Mungyeom and Kim, Chaehyun and Jeon, Minkyeong and Han, Jisang and Fukuda, Kazumi and Narihira, Takuya and Ko, Hyunah and Kim, Junsu and Hong, Sunghwan and Mitsufuji, Yuki and Kim, Seungryong}, title = {Learning Compact 3D Representations from Feed-Forward Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {363-373} }
ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Siyuan and Lin, Kevin Qinghong and Shou, Mike Zheng}, title = {ShowUI-p: Flow-based Generative Models as GUI Dexterous Hands}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8130-8140} }
How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?-
[pdf]
[supp]
[bibtex]@InProceedings{Senocak_2026_CVPR, author = {Senocak, Arda and Park, Sooyoung and Oh, Tae-Hyun and Chung, Joon Son}, title = {How Far Can We Go With Synthetic Data for Audio-Visual Sound Source Localization?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22931-22940} }
AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2026_CVPR, author = {Chowdhury, Sanjoy and Yang, Karren D and Liu, Xudong and Faghri, Fartash and Vasu, Pavan Kumar Anasosalu and Tuzel, Oncel and Manocha, Dinesh and Li, Chun-Liang and Vemulapalli, Raviteja}, title = {AMusE: Audio-Visual Benchmark and Alignment Framework for Agentic Multi-Speaker Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22998-23009} }
Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Hongqing and Xu, Jie and Yang, Wenyuan and Zhu, Yonghua and Wen, Guoqiu and Zhu, Xiaofeng}, title = {Global-Graph Guided and Local-Graph Weighted Contrastive Learning for Unified Clustering on Incomplete and Noise Multi-View Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24813-24822} }
GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiaqi and Fu, Ronghao and Liu, Haoran and Sun, Lang and Wang, Qipeng and Yang, Bo}, title = {GeoDiT: A Diffusion-based Vision-Language Model for Geospatial Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20608-20618} }
ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yanzhe and Zhu, Ruijie and Chang, Hanzhi and Li, Zhuoyuan and Lu, Jiahao and Zhang, Tianzhu}, title = {ReFlow: Self-correction Motion Learning for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29803-29813} }
Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Maijie and Li, Yuhua and Zou, Yixiong and Wu, Yao and Ma, Chenru}, title = {Is Bin Generation Indispensable? A Bin-Generation-Free Dataset Quantization via Semantic Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33642-33651} }
CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yitong and Wu, Zuxuan and Qiu, Xipeng and Jiang, Yu-Gang}, title = {CaTok: Taming Mean Flows for One-Dimensional Causal Image Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23161-23171} }
TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyao and Zhang, Sixian and Zhang, Keming and Song, Xinhang and Du, Songjie and Jiang, Shuqiang}, title = {TrajRAG: Retrieving Geometric-Semantic Experience for Zero-Shot Object Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15166-15176} }
SimRecon: SimReady Compositional Scene Reconstruction from Real Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Chong and Zhu, Kai and Wang, Zizhuo and Liu, Fangfu and Zhang, Zhizheng and Duan, Yueqi}, title = {SimRecon: SimReady Compositional Scene Reconstruction from Real Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42452-42463} }
FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Wuyang and Tan, Chengkai and Ge, Chang and Hong, Binye and Yang, Su and Ma, Yongjiu}, title = {FontCrafter: High-Fidelity Element-Driven Artistic Font Creation with Visual In-Context Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {583-593} }
From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2026_CVPR, author = {Ryu, Taehun and Kang, Changwoo and Joo, Kyungdon}, title = {From Corners to Fiducial Tags: Revisiting Checkerboard Calibration for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29557-29566} }
Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Yujie and Ma, Chenglong and Gao, Jianxiong and Wang, Chenhui and Zhang, Shiwei and Gong, Biao and Tan, Shuai and Yuan, Hangjie and Shan, Hongming}, title = {Bridging Brain and Semantics: A Hierarchical Framework for Semantically Enhanced fMRI-to-Video Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28211-28223} }
SceneTok: A Compressed, Diffusable Token Space for 3D Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asim_2026_CVPR, author = {Asim, Mohammad and Wewer, Christopher and Lenssen, Jan Eric}, title = {SceneTok: A Compressed, Diffusable Token Space for 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5870-5880} }
See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jaehyun and Ahn, Minyoung and Kim, Minkyu and Lee, Jonghyun and Lee, Jae-Gil and Park, Dongmin}, title = {See and Fix the Flaws: Enabling VLMs and Diffusion Models to Comprehend Visual Artifacts via Agentic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35810-35820} }
DRM: Diffusion-based Reward Model With Step-wise Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jaxon and Yang, Binxin and Yin, Hubery and Li, Chen and LYU, Jing}, title = {DRM: Diffusion-based Reward Model With Step-wise Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12764-12774} }
DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Junjia and Yang, Binbin and Yan, Pengxiang and Liu, Jiyang and Xia, Bin and Wang, Zhao and Wang, Yitong and Lin, Liang and Li, Guanbin}, title = {DreamShot: Personalized Storyboard Synthesis with Video Diffusion Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36736-36746} }
Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains-
[pdf]
[supp]
[bibtex]@InProceedings{Ben_2026_CVPR, author = {Ben, Qingwei and Xu, Botian and Li, Kailin and Jia, Feiyu and Zhang, Wentao and Wang, Jingping and Wang, Jingbo and Lin, Dahua and Pang, Jiangmiao}, title = {Gallant: Voxel Grid-based Humanoid Locomotion and Local-navigation across 3-D Constrained Terrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28086-28095} }
UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanlin and Guo, Minghui and Zhang, Kaiwen and Zhang, Shize and Zhao, Yiran and Li, Haodong and Zhou, Congyue and Zheng, Weijie and Yan, Yushen and Wu, Shengqiong and Ji, Wei and Cui, Lei and Wei, Furu and Fei, Hao and Lee, Mong-Li and Hsu, Wynne}, title = {UniM: A Unified Any-to-Any Interleaved Multimodal Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15902-15911} }
A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xuanlong and Sha, Youyang and Liu, Longfei and Shen, Xi and Yang, Di}, title = {A Closer Look at Cross-Domain Few-Shot Object Detection: Fine-Tuning Matters and Parallel Decoder Helps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26593-26603} }
Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Fenfen and Liu, Yesheng and Xu, Haiyu and Chen, Yue and He, Zheqi and Zhao, Mingxuan and Chen, Miguel Hu and Yao, Jin-Ge and Yang, Xi}, title = {Do Vision-Language Models Measure Up? Benchmarking Visual Measurement Reading with MeasureBench}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38544-38553} }
TruckDrive: Long-Range Autonomous Highway Driving Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghilotti_2026_CVPR, author = {Ghilotti, Filippo and Palladin, Edoardo and Brucker, Samuel and Sigal, Adam and Bijelic, Mario and Heide, Felix}, title = {TruckDrive: Long-Range Autonomous Highway Driving Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10587-10598} }
Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junshu and Zhao, Sicheng and Zhao, Xin and Yang, Fan and Chen, Ruike and Han, Jungong and Ding, Guiguang}, title = {Spe-BEVHead: Rethinking the Detection Head Design for Bird's-Eye-View Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25799-25809} }
MVP: Multiple View Prediction Improves GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yunzhu and Pan, Zeyu and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Zhu, Linchao}, title = {MVP: Multiple View Prediction Improves GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27482-27492} }
Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumbhar_2026_CVPR, author = {Kumbhar, Shrinidhi and Liao, Haofu and Appalaraju, Srikar and Singh, Kunwar Yashraj}, title = {Towards GUI Agents: Vision-Language Diffusion Models for GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27493-27502} }
Compressed-Domain-Aware Online Video Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhang and Li, Hai and Hou, Shujuan and Dong, Zhetao and Yang, Xiaoyao}, title = {Compressed-Domain-Aware Online Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33621-33631} }
Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Xusheng and Zhou, Lihua and Li, Nianxin and Xu, Miao and Song, Ziyang and Yi, Dong and Wu, Jinlin and Ma, Jiawei and Liu, Hongbin and Lei, Zhen and Luo, Jiebo}, title = {Multimodal Causality-Driven Representation Learning for Generalizable Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13670-13679} }
Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qinglun and Cheng, Shen and Dan, Tian and Fan, Haoqiang and Liu, Guanghui and Liu, Shuaicheng}, title = {Efficient Hybrid SE(3)-Equivariant Visuomotor Flow Policy via Spherical Harmonics for Robot Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27989-27998} }
PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ismayilzada_2026_CVPR, author = {Ismayilzada, Elkhan and Zhang, Yufei and Cui, Zijun}, title = {PAD-Hand: Physics-Aware Diffusion for Hand Motion Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28358-28368} }
SVBench: Evaluation of Video Generation Models on Social Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Wenshuo and Wang, Gongxuan and Yang, Tianmeng and Li, Chuanhao and Xu, Xiaojie and He, Hui and Zhang, Kaipeng}, title = {SVBench: Evaluation of Video Generation Models on Social Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32872-32881} }
GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Abu_Lehyeh_2026_CVPR, author = {Abu Lehyeh, Ayesh and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {GeoFlow: Real-Time Fine-Grained Cross-View Geolocalization via Iterative Flow Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5369-5378} }
Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Meng and Xu, Ran and Fang, Yi and Zhang, Wenxuan and Yu, Yue and Srivastava, Gaurav and Zhuang, Yuchen and Elhoseiny, Mohamed and Fleming, Charles and Yang, Carl and Tu, Zhengzhong and Xie, Yang and Xiao, Guanghua and Jin, Di and Shi, Wenqi and Wang, Xuan}, title = {Scaling Agentic Reinforcement Learning for Tool-Integrated Reasoning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26518-26529} }
TESO: Online Tracking of Essential Matrix by Stochastic Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moravec_2026_CVPR, author = {Moravec, Jaroslav and Sara, Radim and Sugimoto, Akihiro}, title = {TESO: Online Tracking of Essential Matrix by Stochastic Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28819-28827} }
AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ying_2026_CVPR, author = {Ying, Zonghao and Wang, Le and Xiao, Yisong and Wang, Jiakai and Ma, Yuqing and Guo, Jinyang and Yin, Zhenfei and Zhang, Mingchuan and Liu, Aishan and Liu, Xianglong}, title = {AGENTSAFE: Benchmarking the Safety of Embodied Agents on Hazardous Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37664-37673} }
RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lawrence_2026_CVPR, author = {Lawrence, Logan and Saha, Oindrila and Daroya, Rangel and Chasmai, Mustafa and Liu, Wuao and Hamilton, Max and Sun, Aaron and Jeong, Seoyun and Delattre, Fabien and Maji, Subhransu and Van Horn, Grant}, title = {RealBirdID: Benchmarking Bird Species Identification in the Era of MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2445-2456} }
Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Lin, Chenguo and Li, Chenxin and Zhao, Jingjing and Lin, Yuchen and Li, Haopeng and Lin, Yunlong and Wen, Kairun and Yuan, Yixuan and MU, Yadong}, title = {Diff4Splat: Repurposing Video Diffusion Models for Dynamic Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4232-4244} }
DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xin and Tang, Hao and Cao, Meiqi and Gao, Junyao and Shen, Fei and Li, Zechao}, title = {DiT-Distill: Open-Set Fine-Grained Retrieval via Generative Curriculum Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38753-38762} }
Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Yuechen and Li, Fang and Chen, Qimao and Xu, Shaoqing and Liu, Jiaxin and Song, Ziying and Yang, Zhi-xin and Wen, Fuxi}, title = {Unleashing VLA Potentials in Autonomous Driving via Explicit Learning from Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24833-24842} }
X-WIN: Building Chest Radiograph World Model via Predictive Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zefan and Wang, Ge and Hendler, James and Kalra, Mannudeep K. and Yan, Pingkun}, title = {X-WIN: Building Chest Radiograph World Model via Predictive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6920-6930} }
OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghyeon and Lee, Minwoo and Shin, Euijin and Kim, Kangyeol and Choi, Seunghwan and Choo, Jaegul}, title = {OPRO: Orthogonal Panel-Relative Operators for Panel-Aware In-Context Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9233-9242} }
Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zheng and He, Nan and Chen, Yiming and Sun, Lifeng}, title = {Submodel Extraction for Efficient and Personalized Federated Learning via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3369-3378} }
Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Sangare_2026_CVPR, author = {Sangare, Amadou S. and Maglo, Adrien and Chaouch, Mohamed and Luvison, Bertrand}, title = {Improving Controllable Generation: Faster Training and Better Performance via x0-Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9106-9115} }
Rethinking Visual Rearrangement from A Diffusion Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Tianliang and Song, Xinhang and Liu, Yuyi and Jiang, Shuqiang}, title = {Rethinking Visual Rearrangement from A Diffusion Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15222-15231} }
ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxing and Li, Zheng and Liang, Huanhuan and Zhang, Ji and Sun, Zeyu and Liu, Yong}, title = {ClimaOoD: Improving Anomaly Segmentation via Physically Realistic Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17853-17862} }
Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Liu, Liangjin and Zhou, Pei and Min, Shiquan and Zhu, Jiangping}, title = {Multi-View Hierarchical Alignment Learning for Spatial Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26783-26792} }
AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Li'an and He, Ziqiang and Zheng, Jibin and Li, Jin and Wang, Z. Jane and Kang, Xiangui}, title = {AdaIAT: Adaptively Increasing Attention to Generated Text to Alleviate Hallucinations in LVLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11076-11085} }
Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sebin and Lee, Jumin and Kim, Taeyeon and Na, Youngju and Im, Woobin and Yoon, Sung-Eui}, title = {Visual-RRT: Finding Paths toward Visual-Goals via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13486-13495} }
Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Co, Peterson and Xu, Yijie and Rong, Shanyu and Ji, Yuheng and Chi, Cheng and Chen, Xiansheng and Zhao, Zhongxia and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {Action-Sketcher: From Reasoning to Action via Visual Sketches for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22433-22444} }
Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiming and You, Sisi and Bao, Bing-Kun}, title = {Mixture-of-Experts based Feature Decoupling for Open Vocabulary Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32206-32215} }
FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jingren and Xu, Shuning and Yang, Qirui and Wang, Yun and Chen, Xiangyu and Ji, Zhong}, title = {FAPE-IR: Frequency-Aware Planning and Execution Framework for All-in-One Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15561-15573} }
GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Jungtaek and Han, Bohyung}, title = {GP-4DGS: Probabilistic 4D Gaussian Splatting from Monocular Video via Variational Gaussian Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33394-33403} }
Asking like Socrates: Socrates helps VLMs understand remote sensing images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Run and Li, Ziyu and Zhang, Zhaoyang and Xu, Linrui and He, Xinran and Yuan, Hongyuan and He, Bolei and Dai, Yongxing and Yan, Yiming and Chen, Yijun and Guo, Wang and Li, Haifeng}, title = {Asking like Socrates: Socrates helps VLMs understand remote sensing images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26465-26475} }
EgoSound: Benchmarking Sound Understanding in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Bingwen and Fu, Yuqian and Dong, Qiaole and Sun, Guolei and Qian, Tianwen and Wu, Yuzheng and Paudel, Danda Pani and Fu, Yanwei and Xue, Xiangyang}, title = {EgoSound: Benchmarking Sound Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25589-25598} }
STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Wang, Dongsheng and Sun, Zening and Zhang, Jinsen and Luo, Wenhan and Guo, Jingcai}, title = {STiTch: Semantic Transition and Transportation in Collaboration for Training-Free Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12374-12384} }
Language-guided Frequency Modulation for Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Shuyi and Fang, Gongfan and Ma, Xinyin and Chen, Yen-Wei and Lin, Lanfen and Wang, Xinchao}, title = {Language-guided Frequency Modulation for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39175-39185} }
Masked Representation Modeling for Domain-Adaptive Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Wenlve and Zhou, Zhiheng and Xian, Tiantao and Zhai, Yikui and Wu, Weibin and MA, Biyun}, title = {Masked Representation Modeling for Domain-Adaptive Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36999-37009} }
Test-Time Attention Purification for Backdoored Large Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhifang and Yang, Bojun and He, Shuo and Chen, Weitong and Zhang, Wei Emma and Maennel, Olaf and Feng, Lei and Xu, Miao}, title = {Test-Time Attention Purification for Backdoored Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22826-22835} }
SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Tong and Shi, Hailong and Gao, Xingyu}, title = {SCoRe: Salience-Coverage Reduction for Vision Token Pruning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24686-24695} }
Hear What Matters! Text-conditioned Selective Video-to-Audio Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junwon and Nam, Juhan and Lee, Jiyoung}, title = {Hear What Matters! Text-conditioned Selective Video-to-Audio Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36680-36690} }
Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Shanliang and Wang, Xiaoxiao}, title = {Cross-Modal Guided Visual Synthesis for Data-Efficient Multimodal Depression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15933-15943} }
Exploring 6D Object Pose Estimation with Deformation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiqiang and Song, Rui and Chuangqi, Duanmu and Li, Jiaojiao and Ferstl, David and Hu, Yinlin}, title = {Exploring 6D Object Pose Estimation with Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33078-33087} }
Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huangfu_2026_CVPR, author = {Huangfu, Yuanxiang and Wang, Chaochao and Wang, Weilei}, title = {Role-SynthCLIP: A Role-Play Driven Diverse Synthetic Data Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10142-10151} }
MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Han and Sun, Jiakai and Xu, Yexing and Zhao, Lei and Xing, Wei and Lin, Huaizhong}, title = {MAPo: Motion-Aware Partitioning of Deformable 3D Gaussian Splatting for High-Fidelity Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11767-11776} }
D-Prism: Differentiable Primitives for Structured Dynamic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xingyuan and Li, Yijin and Zeng, Chong and Ming, Yuhang and Bao, Hujun and Zhang, Guofeng}, title = {D-Prism: Differentiable Primitives for Structured Dynamic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7556-7566} }
Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Hua and Zhou, Zikang and Zhou, Qian and Wen, Zihao and Hu, Junjie and Chen, Xinhong and Jiang, Zhengmin and Li, Yung-Hui and Wang, Jianping}, title = {Perceiving the Near, Reasoning the Distant: Coherent Long-Horizon Trajectory Prediction for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24875-24884} }
DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yichao and Shen, Huawen and Yu, Liu and Liu, Shiyu and Chen, Zeyu and Zhou, Yu}, title = {DRS-GUI: Dynamic Region Search for Training-Free GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34606-34616} }
DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Mengping and Tan, Zhiyu and Li, Binglei and Yang, Xiaomeng and Chen, Hesen and Li, Hao}, title = {DiverseDiT: Towards Diverse Representation Learning in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40591-40601} }
SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Guanghui and Zhao, Huan and Zhao, Zhixue and Ma, Tengfei and Wang, Kehan and Eger, Steffen and Jiang, Zhihua}, title = {SCIEval: Evaluating and Benchmarking the Faithfulness of Scientific Image Generation and Interpretation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29760-29770} }
LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception-
[pdf]
[supp]
[bibtex]@InProceedings{de_Moreau_2026_CVPR, author = {de Moreau, Simon and Bursuc, Andrei and El Idrissi, Hafid and Moutarde, Fabien}, title = {LiDAS: Lighting-driven Dynamic Active Sensing for Nighttime Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14188-14197} }
RoboAgent: Chaining Basic Capabilities for Embodied Task Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Peiran and Zheng, Jiaqi and Mu, Yadong}, title = {RoboAgent: Chaining Basic Capabilities for Embodied Task Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15276-15290} }
Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Subin and Cho, In and Hong, Junyoung and Cho, Woong Oh and Kim, Seon Joo}, title = {Unsupervised Monocular 3D Keypoint Discovery from Multi-View Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17132-17142} }
AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lei and Li, Jifeng and Gao, Juntao and Ye, Feiyang and Jin, Yan and Qian, Jingjing and Zhang, Jing and Wu, Yong and Yu, Xiaoyuan}, title = {AVA-VLA: Improving Vision-Language-Action models with Active Visual Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13453-13463} }
URScenes: A Multi-scenario Dataset for Unstructured Road Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Runsen and Baoerhan, Aizemaitijiang and Wang, Zhangyu and Wang, Jie and Cui, Jinghao and Yu, Guizhen and Yang, Songyue and Sun, WanCheng and Tang, Mingjun and Hua, Zhanbo and Luo, Wenwen}, title = {URScenes: A Multi-scenario Dataset for Unstructured Road Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17874-17883} }
EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pallotta_2026_CVPR, author = {Pallotta, Enrico and Azar, Sina Mokhtarzadeh and Doorenbos, Lars and Ozsoy, Serdar and Iqbal, Umar and Gall, Juergen}, title = {EgoControl: Controllable Egocentric Video Generation via 3D Full-Body Poses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4269-4279} }
Act2See: Emergent Active Visual Perception for Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Martin Q. and Qu, Yuxiao and Agrawal, Aditya and Guo, Willis and Liang, Paul Pu and Salakhutdinov, Ruslan and Morency, Louis-Philippe}, title = {Act2See: Emergent Active Visual Perception for Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5455-5464} }
SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haiyang and Wu, Ronghuan and Wei, Li-Yi and Zhao, Nanxuan and Liu, Chenxi and Nguyen, Cuong and Tu, Zhuowen and Wang, Zhaowen}, title = {SemLayer: Semantic-aware Generative Segmentation and Layer Construction for Abstract Icons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42082-42092} }
From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Wang, Zhaozhi and Wang, Fangjinhua and Yang, Jiaolong and Pollefeys, Marc and Zhang, Tong}, title = {From Indoor to Open World: Revealing the Spatial Reasoning Gap in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16789-16799} }
Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Lu and Xue, Cheng}, title = {Noise-Aware Few-Shot Learning through Bi-directional Multi-View Prompt Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41646-41656} }
RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Xiaokai and Zhou, Chenxu and Zheng, Lianqing and Liu, Jianan and Cao, Si-Yuan and Zhang, Xiaohan and Li, Yiming and Zhang, Zhengzhuang and Shen, Hui-Liang}, title = {RaGS: Unleashing 3D Gaussian Splatting from 4D Radar and Monocular Cue for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4983-4992} }
MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Fengyu and Tan, Tiao and Li, Teng and Quan, Yuantian and Liao, Qingmin}, title = {MARSS: Radar Semantic Segmentation via Modular Attention and State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17959-17968} }
GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Seong Je and Lee, Ju Hwan and Lim, Chae Yeon and Lee, Donghwan and Chung, Myung Jin and Kim, Kyungsu}, title = {GH-NAF: Grid-Adaptive Hash-Level-Attended Neural Attenuation Fields for Discrepancy-Aware CBCT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41762-41772} }
Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiheng and Tan, Zichang and Xu, Guoqing and Lei, Zhen and Zhou, Xu and Yang, Yang}, title = {Towards Generalizable AI-Generated Image Detection via Image-Adaptive Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21262-21272} }
WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yifang and Cui, Jiahao and Zhu, Zhihao and Shang, Hanlin and Luan, Shan and Xu, Mingwang and Cai, Feipeng and Zhang, Neng and Li, Yaoyi and Cai, Jia and Zhu, Siyu}, title = {WAM-Flow: Parallel Coarse-to-Fine Motion Planning via Discrete Flow Matching for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24918-24928} }
Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yitian and Zhang, Shigeng and Liu, Xuan and Lu, Mingming and Chen, Kai and Zhu, Hongye and Chen, Xinning}, title = {Parameter-efficient Continual Learning for Enhancing Plasticity without Forgetting under Limited Model Capacity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10789-10798} }
ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabathier_2026_CVPR, author = {Sabathier, Remy and Novotny, David and Mitra, Niloy J. and Monnier, Tom}, title = {ActionMesh: Animated 3D Mesh Generation with Temporal 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34312-34321} }
Towards Intrinsic-Aware Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhihao and Kumar, Abhinav and Liu, Xiaoming}, title = {Towards Intrinsic-Aware Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40739-40750} }
Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Junjian and Kuang, Hulin and Liu, Jin and Yue, Hailin and He, Mengshen and Wang, Jianxin}, title = {Universal-to-Specific: Dynamic Knowledge-Guided Multiple Instance Learning for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26614-26623} }
From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Li-Jun and Chen, Zhen-Duo and Luo, Xin and Xu, Xin-Shun}, title = {From Few-way to Many-way: Rethinking Few-shot Fine-grained Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12364-12373} }
Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Haochen and Zhang, Kanyu and Yin, Shuyu and Guo, Qinghai and Liu, Peilin and Wen, Fei}, title = {Boosting Vision-Language-Action Finetuning with Feasible Action Neighborhood Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27956-27966} }
High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xianjie and Fu, Keren and Zhao, Qijun}, title = {High-Precision Dichotomous Image Segmentation via Depth Integrity-Prior and Fine-Grained Patch Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6357-6366} }
Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yushi and Askari-Hemmat, Reyhane and Hall, Melissa and Dinan, Emily and Zettlemoyer, Luke and Ghazvininejad, Marjan}, title = {Multimodal RewardBench 2: Evaluating Omni Reward Models for Interleaved Text and Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36904-36915} }
Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Qi and Wang, Can and Shang, Jiaxiang and Liu, Yingchun and Liao, Jing}, title = {Ani3DHuman: Photorealistic 3D Human Animation with Self-guided Stochastic Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12651-12662} }
VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shihao and Chen, Guo and Huang, De-An and Li, Zhiqi and Li, Minghan and Liu, Guilin and Kautz, Jan and Alvarez, Jose M. and Zhang, Lei and Yu, Zhiding}, title = {VideoITG: Multimodal Video Understanding with Instructed Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24640-24650} }
KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Feiyu and Li, Jia and Chen, Zhao and Wu, Yang and Cao, Caleb Chen and Chen, Lei}, title = {KAMP: Knowledge-Anchored Multimodal Pretraining Framework for Medical Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21067-21077} }
Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tianyi and Lu, Ye and Zhang, Linfeng and Cai, Chen and Gao, Jianjun and Wang, Yi and Yap, Kim-Hui and Chau, Lap-Pui}, title = {Accelerating Diffusion-based Video Editing via Heterogeneous Caching: Beyond Full Computing at Sampled Denoising Timestep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35800-35809} }
Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Shizhan and Hu, Minda and Zhang, Qiyuan and Ma, Chen and Dou, Qi}, title = {Saliency-R1: Enforcing Interpretable and Faithful Vision-language Reasoning via Saliency-map Alignment Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24095-24106} }
Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mo_2026_CVPR, author = {Mo, Yifan and Cai, Youcheng and Liu, Ligang}, title = {Plug-and-Play PDE Optimization for 3D Gaussian Splatting: Toward High-Quality Rendering and Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33333-33342} }
A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Sungik and Lee, Hankook and Lee, Jaehoon and Kim, Robin and Choi, Stanley Jungkyu and Lee, Moontae}, title = {A Debiased Reconstruction-based Framework for Training-Free Detection of AI-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3254-3263} }
UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zhuangcheng and Liang, Guang and Wang, Bin and Zhao, Zhiyuan and Zhang, Qintong and Li, Weijia and Xu, Chao and Zhang, Bo and Shi, Botian and Wu, Jiang and Zhang, Wentao and He, Conghui}, title = {UniMERNet: A Universal Network for Real-World Mathematical Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34106-34115} }
Open-Vocabulary Domain Generalization in Urban-Scene Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dong and Zang, Qi and Pu, Nan and Li, Wenjing and Sebe, Nicu and Zhong, Zhun}, title = {Open-Vocabulary Domain Generalization in Urban-Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20388-20398} }
Fourier Angle Alignment for Oriented Object Detection in Remote Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Changyu and Chen, Linwei and Gu, Lin and Fu, Ying}, title = {Fourier Angle Alignment for Oriented Object Detection in Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42225-42235} }
SimScale: Learning to Drive via Real-World Simulation at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Haochen and Li, Tianyu and Liu, Haochen and Yang, Jiazhi and Qiu, Yihang and Li, Guang and Wang, Junli and Gao, Yinfeng and Zhang, Zhang and Wang, Liang and Ye, Hangjun and Chen, Long and Li, Hongyang}, title = {SimScale: Learning to Drive via Real-World Simulation at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36365-36374} }
StyleTextGen: Style-Conditioned Multilingual Scene Text Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zeyu and Zhao, Fangmin and Shu, Yan and Liu, Yichao and Yu, Liu and Zhou, Yu}, title = {StyleTextGen: Style-Conditioned Multilingual Scene Text Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7643-7653} }
Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Naifu and Jia, Zhaoyang and Li, Jiahao and Li, Bin and Zheng, Zihan and Zhang, Yuan and Lu, Yan}, title = {Single-step Diffusion-based Video Coding with Semantic-Temporal Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9752-9761} }
Unified Customized Generation by Disentangled Reward Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Shaojin and Huang, Mengqi and Cheng, Yufeng and Wu, Wenxu and Tian, Jiahe and Luo, Yiming and Ding, Fei and He, Qian}, title = {Unified Customized Generation by Disentangled Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34418-34427} }
AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingyun and Liu, Xinyi and Zhang, Yongjun and Wan, Yi and Liu, Xiaoan and Fan, Weiwei and Liu, Jiahao}, title = {AeroGS: Scale-Aware Gaussian Splatting for Pose-Free Dynamic UAV Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40907-40917} }
Neural Collapse in Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Du, Zhongjing and Huang, Jiazhen and Jiang, Xu and Lu, Li and Jiang, Jingyan and Wang, Zhi}, title = {Neural Collapse in Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10567-10576} }
Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jianan and Chen, Xiao and Huang, Tao and Wong, Tien-Tsin}, title = {Learning to Control Physically-simulated 3D Characters via Generating and Mimicking 2D Motions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38302-38312} }
DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yufu and Ng, Evonne and Shin, Soyong and Khirodkar, Rawal and Dong, Yuan and Su, Zhaoen and Park, Jinhyung and Kitani, Kris and Richard, Alexander and Prada, Fabian and Zollh\"ofer, Michael}, title = {DuoMo: Dual Motion Diffusion for World-Space Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42777-42788} }
Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jianrong and Fan, Hehe and Yang, Yi}, title = {Towards Decompositional Human Motion Generation with Energy-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30650-30660} }
HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Junhao and Luo, Chenqi and Wang, Pufan and Lu, Jiaying and Liu, Yusheng and Qin, Feiwei and Fang, Meie and Zhou, Kun}, title = {HiFi-BRep: High-Fidelity Latent Representation for Robust B-Rep Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27199-27208} }
ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Roy_2026_CVPR, author = {Roy, Ayush and Lee, Wei-Yang Alex and Chakraborty, Rudrasis and Lokhande, Vishnu Suresh}, title = {ManifoldGD: Training-Free Hierarchical Manifold Guidance for Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12406-12416} }
Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Guanjie and Huang, Shirui and Sun, Yifu and Liu, Kai and Zhu, Jianchen and Qu, Xiaoye and Cheng, Yu and Chen, Peng}, title = {Flash-DMD: Towards High-Fidelity Few-Step Image Generation with Efficient Distillation and Joint Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6010-6020} }
TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Haowei and Huang, Tingxuan and Wang, Xing and Zhao, Tianyu and Wang, Jiexi and Chen, Weifeng and Peng, Xurui and Chen, Fangmin and Yong, Junhai and Wang, Bin}, title = {TAP: A Token-Adaptive Predictor Framework for Training-Free Diffusion Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36082-36091} }
Debiased Sample Selection for Learning with Noisy Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Weiran and Wei, Wei and Xie, Wenfeng}, title = {Debiased Sample Selection for Learning with Noisy Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32047-32057} }
SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Xie, Shangbin and Cao, Cong and Wu, Qian and Zhang, Lei and Zhao, Lei and Yang, Jingyu}, title = {SpiralDiff: Spiral Diffusion with LoRA for RGB-to-RAW Conversion Across Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38453-38463} }
Inter-Photon-Limited Videography-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Andrew and Du, Dongyu and Nousias, Sotiris and Lindell, David B. and Kutulakos, Kiriakos N.}, title = {Inter-Photon-Limited Videography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34006-34015} }
MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Shiyu and Zhang, Xinjie and Liu, Zhening and Wang, Jinpeng and Chen, Bin and Li, Jiawei and Ren, Yifan and Xia, Shu-Tao and Zhang, Jun}, title = {MambaSIC: Mamba-based Stereo Image Compression with Bi-directional Multi-reference Entropy Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5306-5315} }
GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuqin and Wu, Tao and Zhang, Yanfeng and Liu, Lu and Sun, Mingwei and Wang, Yongliang and Zeller, Niclas and Cremers, Daniel}, title = {GeodesicNVS: Probability Density Geodesic Flow Matching for Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40316-40326} }
Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamabe_2026_CVPR, author = {Yamabe, Shojiro and Waseda, Futa and Shiono, Daiki and Takahashi, Tsubasa}, title = {Text-Printed Image: Bridging the Image-Text Modality Gap for Text-centric Training of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17270-17281} }
HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiang and Zhang, Zhifei and Zhang, He and Lin, Zhe and Zhou, Yuqian and Liu, Qing and Zhang, Shiwei and Li, Yijun and Liu, Shaoteng and Zheng, Haitian and Kuen, Jason and Wang, Yuehuan and Gao, Changxin and Sang, Nong}, title = {HBridge: H-Shape Bridging of Heterogeneous Experts for Unified Multimodal Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14767-14778} }
InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Que_2026_CVPR, author = {Que, Xiaofan and Wang, Dingrong and Liu, Xumin and Yu, Qi}, title = {InsCal: Calibrated Multi-Source Fully Test-Time Prompt Tuning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36936-36946} }
ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yingzhao and Liu, Yanjie and Zhao, Lijun}, title = {ActivePolicy: Active Gaussian Reconstruction and Optimization Strategy Based on Global-Local Information Gain}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5003-5013} }
Unified Number-Free Text-to-Motion Generation Via Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Guanhe and Celiktutan, Oya}, title = {Unified Number-Free Text-to-Motion Generation Via Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23570-23580} }
Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Keuntae and Kang, Mingyu and Choi, Yong Suk}, title = {Thinking Diffusion: Penalize and Guide Visual-Grounded Reasoning in Diffusion Multimodal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5154-5164} }
Mixture of States: Routing Token-Level Dynamics for Multimodal Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haozhe and Liu, Ding and Zhuge, Mingchen and Zhou, Zijian and Xie, Tian and He, Sen and Yang, Yukang and Liu, Shuming and Cong, Yuren and Guo, Jiadong and Xu, Hongyu and Xu, Ke and Ng, Kam-Woh and Perez, Juan C. and Perez-Rua, Juan-Manuel and Xiang, Tao and Liu, Wei and Liu, Shikun and Schmidhuber, J\"urgen}, title = {Mixture of States: Routing Token-Level Dynamics for Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36781-36792} }
Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yujin and Kim, Sewon and Moon, Daeun and Jang, Seoyoon and Yoon, Hyunsoo}, title = {Bidirectional Multimodal Prompt Learning with Scale-Aware Training for Few-Shot Multi-Class Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35577-35586} }
Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Zixuan and Liu, Quande and Wei, Cong and Zhang, Yuanxing and Wang, Xintao and Wan, Pengfei and Gai, Kun and Luo, Wenhan}, title = {Visual-Aware CoT: Achieving High-Fidelity Visual Consistency in Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9116-9126} }
LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Haoyu and Liu, Xueting and Gao, Yu and Huang, Wenze and Yang, Zhihao and Ren, Weihong and Wang, Zhiyong and Liu, Honghai}, title = {LaDy: Lagrangian-Dynamic Informed Network for Skeleton-based Action Segmentation via Spatial-Temporal Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34449-34459} }
GenTract: Generative Global Tractography-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargood_2026_CVPR, author = {Sargood, Alec and Puglisi, Lemuel and Thompson, Elinor and Musolesi, Mirco and Alexander, Daniel C.}, title = {GenTract: Generative Global Tractography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35291-35300} }
VQ-VA World: Towards High-Quality Visual Question-Visual Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gou_2026_CVPR, author = {Gou, Chenhui and Chen, Zilong and Wang, Zeyu and Li, Feng and Zhu, Deyao and Duan, Zicheng and Li, Kunchang and Deng, Chaorui and Yuan, Hongyi and Fan, Haoqi and Xie, Cihang and Cai, Jianfei and Rezatofighi, Hamid}, title = {VQ-VA World: Towards High-Quality Visual Question-Visual Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18837-18847} }
From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Rui and Wu, Song and Yang, Wen and Wu, Jinjian}, title = {From Contrast to Consistency: Rethinking Event-based Continuous-Time Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15125-15134} }
ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tianyu and He, ChenWei and Hao, Xiangzhao and Wang, Tianyue and Guo, Jiarui and Guo, Haiyun and Qu, Leigang and Wang, Jinqiao and Chua, Tat-Seng}, title = {ReCALL: Recalibrating Capability Degradation for MLLM-based Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38763-38773} }
Adaptive Learned Image Compression with Graph Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yunuo and He, Bing and Lyu, Zezheng and Hu, Hongwei and Gu, Qunshan and Tian, Yuan and Lu, Guo}, title = {Adaptive Learned Image Compression with Graph Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12150-12161} }
Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Jinbo and Jiang, Zeyinzi and Tuo, Yuxiang and Mao, Chaojie and Gai, Xiaotang and Chen, Xi and Zhang, Jingfeng and Pan, Yulin and Han, Zhen and Xiao, Jie and Yan, Keyu and Xie, Chenwei and Zhong, Chongyang and Zhu, Kai and Shen, Tong and Huang, Lianghua and Liu, Yu and Yang, Yujiu}, title = {Wan-Weaver: Interleaved Multi-modal Generation via Decoupled Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36267-36278} }
TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Qianlong and Zhang, Miao and Zhang, Haoyu and Wang, Kun and Hou, Junhui and Nie, Liqiang}, title = {TINA: Text-Free Inversion Attack for Unlearned Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30076-30086} }
ChordEdit: One-Step Low-Energy Transport for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Liangsi and Chen, Xuhang and Guo, Minzhe and Li, Shichu and Wang, Jingchao and Shi, Yang}, title = {ChordEdit: One-Step Low-Energy Transport for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14398-14407} }
MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Zha, Yanping and Li, Zizhuo and Gong, Meiqi and Ma, Jiayi}, title = {MagicFuse: Single Image Fusion for Visual and Semantic Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26551-26560} }
MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiahao and Huang, Sheng and Zhang, Xin and Nan, Zhixiong and Dong, Jiajun and Mu, Nankun}, title = {MUSE: Harnessing Precise and Diverse Semantics for Few-Shot Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33911-33921} }
RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2026_CVPR, author = {Mei, Haiyang and Huang, Qiming and Ci, Hai and Shou, Mike Zheng}, title = {RobotSeg: A Model and Dataset for Segmenting Robots in Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14346-14356} }
Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, XiaoWan and Yang, Jing and Liu, HeNan and Li, HuaQiu and Xu, Mai}, title = {Self-supervised Dynamic Heterogeneous Degradation Modeling for Unified Zero-Shot Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22658-22668} }
Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Zhanhe and Wang, Zhongyuan and Cheng, Jikang and Huang, Baojin and Yang, Yuhong and Han, Zhen and Liang, Chao and Ye, Dengpan}, title = {Tutor-Student Reinforcement Learning: A Dynamic Curriculum for Robust Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41436-41445} }
WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ibn_Sultan_2026_CVPR, author = {Ibn Sultan, Rafi and Zhu, Hui and Zhou, Xiangyu and Li, Chengyin and Khanduri, Prashant and Brocanelli, Marco and Zhu, Dongxiao}, title = {WalkGPT: Grounded Vision-Language Conversation with Depth-Aware Segmentation for Pedestrian Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40085-40095} }
Learning to Learn Weight Generation via Local Consistency Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Yunchuan and Liu, Yu and Zhou, Ke and Shen, Zhiqi and Hwang, Jenq-Neng and Li, Lei}, title = {Learning to Learn Weight Generation via Local Consistency Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19623-19633} }
ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuheng and Duan, Mengfei and Peng, Kunyu and Wang, Yuhang and Wen, Di and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun}, title = {ProOOD: Prototype-Guided Out-of-Distribution 3D Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14241-14252} }
ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Hai and Liu, Zhen and Lei, Yinjie and Han, Songchen and Zeng, Bing and Liu, Shuaicheng}, title = {ZeroIDIR: Zero-Reference Illumination Degradation Image Restoration with Perturbed Consistency Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1320-1330} }
HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yichen and Zhou, Donghao and Wang, Jie and Gao, Xin and Liu, Guisheng and Li, Jiatong and Zhang, Quanwei and Lyu, Qiang and Guo, Lanqing and Wen, Shilei and Wang, Weiqiang and Heng, Pheng-Ann}, title = {HiFi-Inpaint: Towards High-Fidelity Reference-Based Inpainting for Generating Detail-Preserving Human-Product Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1994-2004} }
3D-LATTE: Latent Space 3D Editing from Textual Instructions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parelli_2026_CVPR, author = {Parelli, Maria and Oechsle, Michael and Niemeyer, Michael and Tombari, Federico and Geiger, Andreas}, title = {3D-LATTE: Latent Space 3D Editing from Textual Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14377-14386} }
NI-Tex: Non-isometric Image-based Garment Texture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Hui and Li, Ming and Yang, Haitao and Zheng, Kai and Zheng, Sizhe and Fu, Yanwei and Huang, Xiangru}, title = {NI-Tex: Non-isometric Image-based Garment Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19884-19893} }
Active Intelligence in Video Avatars via Closed-loop World Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xuanhua and Yang, Tianyu and Cao, Ke and Wu, Ruiqi and Meng, Cheng and Zhang, Yong and Kang, Zhuoliang and Wei, Xiaoming and Chen, Qifeng}, title = {Active Intelligence in Video Avatars via Closed-loop World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27239-27248} }
DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Gaowei and Zhang, Lihe}, title = {DLVP-CLIP: Enhancing Fine-Grained Zero-Shot Anomaly Detection via Dynamic Local Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35524-35533} }
Minimal Constraint Relaxation for Multiview Autocalibration-
[pdf]
[supp]
[bibtex]@InProceedings{Kosaka_2026_CVPR, author = {Kosaka, Norio and Duff, Timothy and Pajdla, Tomas}, title = {Minimal Constraint Relaxation for Multiview Autocalibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28937-28946} }
SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Tong and Fu, Yusen and Sun, Guoying and Kong, Jingde and Tian, Zhuotao and Su, Jingyong}, title = {SODA: Sensitivity-Oriented Dynamic Acceleration for Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33012-33021} }
ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Lukovnikov_2026_CVPR, author = {Lukovnikov, Denis and M\"uller, Andreas and Quiring, Erwin and Fischer, Asja}, title = {ClusterMark: Towards Robust Watermarking for Autoregressive Image Generators with Visual Token Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9213-9222} }
PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Suyi and Lee, Gim Hee}, title = {PhysHO: Physics-Based Dynamic 3D Gaussian Human and Object from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32507-32517} }
CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mingfang and Pan, Jingjing and Kumar, Ashutosh and Saini, Rajat and Erdogan, Mustafa and Yang, Hsuan-Kung and Kang, Caixin and Huang, Yifei and Sato, Yoichi and Kong, Quan}, title = {CaST-Bench: Benchmarking Causal Chain-Grounded Spatio-Temporal Reasoning for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31856-31866} }
The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuchen and Wang, Yaxiong and Wu, Yujiao and Wu, Lianwei and Zhu, Li and Zheng, Zhedong}, title = {The Coherence Trap: When MLLM-Crafted Narratives Exploit Manipulated Visual Contexts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8760-8769} }
Generative Diffusion Priors for 3D Mapping of the Dark Universe-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Brandon and Scognamiglio, Diana and Dor\'e, Olivier and Bouman, Katherine L.}, title = {Generative Diffusion Priors for 3D Mapping of the Dark Universe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23581-23590} }
PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Hanqiao and Liu, Yuzhou and Liu, Yangdong and Shen, Shuhan}, title = {PlanaReLoc: Camera Relocalization in 3D Planar Primitives via Region-Based Structure Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26409-26421} }
Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Chonghua and Zhao, Dong and Wang, Shuang and Quan, Dou and Huyan, Ning and Sebe, Nicu and Zhong, Zhun}, title = {Generalizable Knowledge Distillation from Vision Foundation Models for Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26656-26666} }
Coverage Optimization for Camera View Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Timothy and Dai, Adam and Adang, Maximilian and Gao, Grace and Schwager, Mac}, title = {Coverage Optimization for Camera View Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19443-19451} }
Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zengyi and Liu, Yu and Cheng, Juan and Zhu, Zhiqin and Zhang, Yafei and Li, Huafeng}, title = {Customized Fusion: A Closed-Loop Dynamic Network for Adaptive Multi-Task-Aware Infrared-Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {188-198} }
StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vecchio_2026_CVPR, author = {Vecchio, Giuseppe}, title = {StableMaterials: Enhancing Diversity in Material Generation via Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19665-19675} }
MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jiaxin and Wu, Yue and Zhou, Yicong}, title = {MEMO: Human-like Crisp Edge Detection Using Masked Edge Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27740-27749} }
TTRV: Test-Time Reinforcement Learning for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Akshit and Marjit, Shyam and Lin, Wei and Gavrikov, Paul and Yeung-Levy, Serena and Kuehne, Hilde and Feris, Rogerio and Doveh, Sivan and Glass, James and Mirza, M. Jehanzeb}, title = {TTRV: Test-Time Reinforcement Learning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33153-33163} }
CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tingjia and Chen, Bo and Liu, Shengzhong and Wu, Fan and Chen, Guihai}, title = {CaT-GS: Efficient 3DGS Rendering for Large-Scale Scenes with Inter-frame Caching and Tile Scheduling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37391-37400} }
FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)$^N$ Diffusion Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Xie, Jin and Yang, Jian and Yu, Liang and Zheng, Jianmin}, title = {FUSER: Feed-Forward Multiview 3D Registration Transformer and SE(3)\${\textasciicircum}N\$ Diffusion Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7393-7403} }
SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haoning and Huang, Xiao and Chen, Yaohui and Zhang, Ya and Wang, Yanfeng and Xie, Weidi}, title = {SpatialScore: Towards Comprehensive Evaluation for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31029-31041} }
PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xinxing and Liu, Ajian and Qiang, Sunyuan and Ma, Hui and Yang, Liying and Wang, Yuzhong and Rao, Zhi and Liang, Yanyan}, title = {PointCSP: Cross-Sample Semantic Propagation and Stability Preservation in Self-Supervised Point Cloud Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10016-10026} }
Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Kang and Ding, Yuzhe and Wang, Xinrong and Li, Fei and Teng, Chong and Ji, Donghong}, title = {Enhance-then-Balance Modality Collaboration for Robust Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30183-30193} }
SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Shuai and Gong, Biao and Wei, Yujie and Zhang, Shiwei and Liu, Zhuoxin and Ma, Ke and Wang, Yan and Zheng, Kecheng and Zhu, Xing and Shen, Yujun and Zhao, Hengshuang}, title = {SynMotion: Semantic-Visual Adaptation for Motion Customized Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30477-30489} }
UniSER: A Foundation Model for Unified Soft Effects Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingdong and Zhang, Lingzhi and Liu, Qing and Chiu, Mang Tik and Barnes, Connelly and Wang, Yizhou and You, Haoran and Liu, Xiaoyang and Zhou, Yuqian and Lin, Zhe and Shechtman, Eli and Amirghodsi, Sohrab and Li, Xin and Wang, Wenping and Zhan, Xiaohang}, title = {UniSER: A Foundation Model for Unified Soft Effects Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16096-16107} }
EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiahao and Zhang, Zihui and Yang, Yafei and Li, Jinxi and Wei, Shenxing and Sun, Zhixuan and Yang, Bo}, title = {EvObj: Learning Evolving Object-centric Representations for 3D Instance Segmentation without Scene Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39817-39826} }
Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Bo and Chen, Junxi and Wu, Zhe and Gao, Feng and Yang, Fan and Su, Li and Wang, Yaowei}, title = {Joint Learning of General and Diverse Patterns with Mixture of Memory Experts for Weakly-Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35638-35647} }
Efficient Frame Selection for Long Video Understanding via Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Yaxuan and Li, Hefei and Mu, Wenqi and He, Yancheng}, title = {Efficient Frame Selection for Long Video Understanding via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16944-16953} }
Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Zhao, Yuanzhi and Zhao, Haimei and Zhao, Yudong and Xu, Haikun}, title = {Mask to Align, Weight to Disambiguate: Reliable Unsupervised Cross-Modal Hashing with Masked-Weight Contrast}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30151-30161} }
Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Dailan and Feng, Guanlin and Ge, Xingtong and Niu, Yazhe and Zhang, Yi and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {Neighbor GRPO: Contrastive ODE Policy Optimization Aligns Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6033-6042} }
CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Weilin and Rao, Jiahao and Wang, Wenhao and Li, Xinyang and Cheng, Xuan and Cao, Liujuan}, title = {CustomTex: High-fidelity Indoor Scene Texturing via Multi-Reference Customization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4280-4290} }
PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zaytsev_2026_CVPR, author = {Zaytsev, Viktor and Vynokurova, Olena and Tytarchuk, Pavlo and Kozii, Dmytro and Pohribnyi, Vitalii and Radyvonenko, Olga and Shcherbina, Artem}, title = {PIX-TAB: Efficient PIXel-Precise TABle Structure Recognition Approach with Speculative Decoding and Region-Based Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23912-23921} }
SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Hongyu and Deng, Jia}, title = {SeeGroup: Multi-Layer Depth Estimation of Transparent Surfaces via Self-Determined Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7299-7309} }
Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhan and Li, Zihan and Liu, Han and Arberet, Simon and Kraus, Martin and Zhou, Yuyin and Ghesu, Florin-Cristian and Comaniciu, Dorin and Kamen, Ali and Gao, Riqiang}, title = {Any2Any 3D Diffusion Models with Knowledge Transfer: A Radiotherapy Planning Study}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16520-16530} }
CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhipeng and Luo, Chunbo}, title = {CrossVL: Complexity-Aware Feature Routing and Paired Curriculum for Cross-View Vision-Language Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10116-10125} }
Grounding Everything in Tokens for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Xiangxuan and Wang, Zhongdao and Hou, Liping and Tang, Pin and Wang, Guoqing and Ma, Chao}, title = {Grounding Everything in Tokens for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41171-41181} }
GFRRN: Explore the Gaps in Single Image Reflection Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yu and He, Zewei and Liu, Xingyu and Chen, Zixuan and Lu, Zhe-Ming}, title = {GFRRN: Explore the Gaps in Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5690-5699} }
VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Juncan and Huang, Kejie}, title = {VLM-PTQ: Efficient Post-Training Quantization for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24696-24705} }
MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Chenguo and Lin, Yuchen and Pan, Panwang and Yu, Yifan and Hu, Tao and Yan, Honglei and Fragkiadaki, Katerina and Mu, Yadong}, title = {MoVieS: Motion-Aware 4D Dynamic View Synthesis in One Second}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {295-306} }
Affostruction: 3D Affordance Grounding with Generative Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Chunghyun and Lee, Seunghyeon and Cho, Minsu}, title = {Affostruction: 3D Affordance Grounding with Generative Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7435-7445} }
PointCNN++: Performant Convolution on Native Points-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Lihan and Zhong, Haofeng and Bu, Rui and Sun, Mingchao and Chen, Wenzheng and Chen, Baoquan and Li, Yangyan}, title = {PointCNN++: Performant Convolution on Native Points}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24151-24161} }
Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zitong and Zhang, Kaidong and Ding, Yukang and Gao, Chao and Ding, Rui and Chen, Ying and Zuo, Wangmeng}, title = {Mind the Generative Details: Direct Localized Detail Preference Optimization for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35998-36008} }
High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Dailan and Wang, Xiahong and Wang, Shulun and Shao, Hao and Ma, Bingqi and Song, Guanglu and Liu, Yu and Li, Hongsheng}, title = {High-Fidelity Diffusion Face Swapping with ID-Constrained Facial Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25767-25776} }
Is Parameter Isolation Better for Prompt-Based Continual Learning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Ding, Chenhao and Dong, SongLin and Wang, Qiang and Zhao, Jianchao and He, Yuhang and Gong, Yihong}, title = {Is Parameter Isolation Better for Prompt-Based Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3887-3897} }
Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Zhao, Shengjie}, title = {Weakly Supervised Video Anomaly Detection with Anomaly-Connected Components and Intention Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28545-28556} }
Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shumeng and Guo, Jintao and Zhang, Jian and Zhou, Yulin and Cao, Luyang and Shi, Yinghuan}, title = {Duala: Dual-Level Alignment of Subjects and Stimuli for Cross-Subject fMRI Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42722-42731} }
MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Di and Yang, Shuhui and Yang, Mingxin and Lu, Jiawei and Tang, Yixuan and Han, Xintong and Chen, Zhuo and Wang, Beibei and Guo, Chunchao}, title = {MatPedia: A Universal Generative Foundation for High-Fidelity Material Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8943-8953} }
Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Siyu and Wang, Zijian and Wang, Yunke and Xia, Chenghao and Huang, Tao and Xu, Chang}, title = {Affordance Field Intervention: Enabling VLAs to Escape Memory Traps in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37206-37215} }
CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junhoo and Koo, Mijin and Kwak, Nojun}, title = {CSF: Black-box Fingerprinting via Compositional Semantics for Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16572-16582} }
CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Houji and Yu, Jiangyong and Yang, Dawei and Li, Jun}, title = {CAR-SAM: Cross-Attention Reconstruction for Post-Training Quantization of the Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33632-33641} }
Vision Transformers Need More Than Registers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Cheng and Yu, Yizhou and Yang, Sibei}, title = {Vision Transformers Need More Than Registers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26328-26337} }
Evidential Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Ruxiao and Wong, Alex}, title = {Evidential Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28632-28641} }
ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hojjat_2026_CVPR, author = {Hojjat, Ali and Haberer, Janek and Pirk, S\"oren and Landsiedel, Olaf}, title = {ThinkingViT: Matryoshka Thinking Vision Transformer for Elastic Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41923-41933} }
Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Joonhyung and Jang, Hyeongwon and Kim, Joowon and Yang, Eunho}, title = {Progress by Pieces: Test-Time Scaling for Autoregressive Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38091-38100} }
Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Boyang and Li, Liang and Peng, Lin and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Cluster-Aware Neural Collapse Prompt Tuning for Long-Tailed Generalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3122-3132} }
Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hualiang and Fu, Siming and Jia, Weinan and Lu, Yuning and Liu, Mu and Jiang, Jidong and Li, Xiaomeng}, title = {Prompt Yourself: Awakening Textual Semantics in 1D Visual Tokenizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14864-14874} }
Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Li, Frederick W. B. and Liang, Xiaohui}, title = {Gaussian-Mixture Latent Flow for Stochastic 3D Human Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7131-7141} }
DreamStyle: A Unified Framework for Video Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengtian and Chen, Jinshu and Zhao, Songtao and Feng, Wanquan and Tu, Pengqi and He, Qian}, title = {DreamStyle: A Unified Framework for Video Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36019-36029} }
Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Mianzhao and Shi, Fan and Cheng, Xu and Jia, Chen and Chen, Shengyong}, title = {Uncertainty-Aware Modality Fusion for Unaligned RGB-T Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41489-41498} }
MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Chao and Shen, Minghe and Yokoya, Naoto}, title = {MD2E: Modeling Depth-to-Edge Cues for Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5772-5782} }
Texvent: Asynchronous Event Data Simulation via Text Prompt-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ruofei and Duan, Peiqi and Cheung, Ka Chun and See, Simon and Shi, Boxin and Wan, Renjie}, title = {Texvent: Asynchronous Event Data Simulation via Text Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36375-36384} }
GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fang and Liu, Yuhao and Xu, Ke and Hancke, Gerhard Petrus and Lau, Rynson W. H.}, title = {GenSplat: Bridging the Generalization Gap in 3DGS Language Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5221-5231} }
Coupled Diffusion Sampling for Training-Free Multi-View Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2026_CVPR, author = {Alzayer, Hadi and Zhang, Yunzhi and Geng, Chen and Huang, Jia-Bin and Wu, Jiajun}, title = {Coupled Diffusion Sampling for Training-Free Multi-View Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43686-43696} }
Video Panels for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Doorenbos_2026_CVPR, author = {Doorenbos, Lars and Spurio, Federico and Gall, Juergen}, title = {Video Panels for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31293-31303} }
PhyGaP: Physically-Grounded Gaussians with Polarization Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiale and Bai, Xiaoyang and He, Zongqi and Xu, Weiwei and Peng, Yifan}, title = {PhyGaP: Physically-Grounded Gaussians with Polarization Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7278-7288} }
Distilling Balanced Knowledge from a Biased Teacher-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seonghak}, title = {Distilling Balanced Knowledge from a Biased Teacher}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18032-18041} }
Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Songtao and Song, Sibo and Zhou, Chenyi and Wang, Yuan and Chen, Ruizhe and Guan, Tongkun and Luo, Ruilin and Zhang, Yan and Tang, Zhihang and Sun, Yuchong and Zhang, Hang and Yang, Zhibo and Bai, Shuai and Lin, Junyang and Liu, Zuozhu}, title = {Learning Transferable Temporal Primitives for Video Reasoning via Synthetic Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31283-31292} }
Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Liyi and Wang, Pengfei and Zhang, Guowen and Ma, Zhiyuan and Zhang, Lei}, title = {Omni-3DEdit: Generalized Versatile 3D Editing in One-Pass}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12640-12650} }
UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rota_2026_CVPR, author = {Rota, Alberto and Kiray, Mert and Karaoglu, Mert Asim and Ruhkamp, Patrick and De Momi, Elena and Navab, Nassir and Busam, Benjamin}, title = {UnReflectAnything: RGB-Only Highlight Removal by Rendering Synthetic Specular Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {241-250} }
Understanding, Accelerating, and Improving MeanFlow Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jin-Young and Go, Hyojun and Bogensperger, Lea and Erbach, Julius and Kalischek, Nikolai and Tombari, Federico and Schindler, Konrad and Narnhofer, Dominik}, title = {Understanding, Accelerating, and Improving MeanFlow Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37992-38003} }
FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xingyu and Wang, Tao}, title = {FOZO: Forward-Only Zeroth-Order Prompt Optimization for Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7936-7945} }
VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gavrikov_2026_CVPR, author = {Gavrikov, Paul and Lin, Wei and Mirza, M. Jehanzeb and Jahagirdar, Soumya and Huzaifa, Muhammad and Doveh, Sivan and Glass, James and Yeung-Levy, Serena and Kuehne, Hilde}, title = {VisualOverload: Probing Visual Understanding of VLMs in Really Dense Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40833-40844} }
STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Hao and Bi, Zetong and Zeng, Yiming and Wan, Zhaoliang and Qi, Lu and Cheng, Hui}, title = {STRNet: Visual Navigation with Spatio-Temporal Representation through Dynamic Graph Aggregation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42464-42473} }
AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vu_2026_CVPR, author = {Vu, Nghia and Do, Tuong and Nguyen, Khang and Huang, Baoru and Le, Nhat and Nguyen, Binh Xuan and Tjiputra, Erman and Tran, Quang D. and Prakash, Ravi and Chiu, Te-Chuan and Nguyen, Anh}, title = {AffordMatcher: Affordance Learning in 3D Scenes from Visual Signifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2635-2644} }
From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Gaoge and Cheng, Yongkang and Chen, Zhe and Huang, Shaoli and Liu, Tongliang}, title = {From 2D Alignment to 3D Plausibility: Unifying Heterogeneous 2D Priors and Penetration-Free Diffusion for Occlusion-Robust Two-Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42800-42809} }
Language-Grounded Decoupled Action Representation for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Wuding and Wu, Tongshu and Chen, Liucheng and Xie, Siyu and Wang, Zheng and Xu, Xing and Song, Jingkuan and Shen, Heng Tao}, title = {Language-Grounded Decoupled Action Representation for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6770-6780} }
Parameterized Prompt for Incremental Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Zijia and Diao, Boyu and Liu, Ruiqi and Huang, Libo and Yang, Chuanguang and Wang, Fei and An, Zhulin and Xu, Yongjun}, title = {Parameterized Prompt for Incremental Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27600-27610} }
PixelDiT: Pixel Diffusion Transformers for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yongsheng and Xiong, Wei and Nie, Weili and Sheng, Yichen and Liu, Shiqiu and Luo, Jiebo}, title = {PixelDiT: Pixel Diffusion Transformers for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14273-14282} }
Video-CoE: Reinforcing Video Event Prediction via Chain of Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Qile and Tang, Jing and Chen, Rui and Sun, Lei and Chu, Xiangxiang}, title = {Video-CoE: Reinforcing Video Event Prediction via Chain of Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32818-32828} }
FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Aro and Jang, Myeongjin and Moon, Chaewon and Shin, Youngjin and Jeong, Jinwoo and Park, Sang-hyo}, title = {FiDeSR: High-Fidelity and Detail-Preserving One-Step Diffusion Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38270-38280} }
QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingxuan and Hsieh, Yunta and Wan, Zhongwei and Lin, Haokun and Wang, Xin and Wang, Ziqi and Lei, Yingtie and Zhang, Mi}, title = {QuantVLA: Scale-Calibrated Post-Training Quantization for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39539-39549} }
BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Jiaxing and Ren, Dongyang and Xu, Hangyu and Yang, Zhouyuxiao and Li, Yuanqi and Guo, Jie and Zhou, Zhengkang and Guo, Yanwen}, title = {BrepGaussian: CAD reconstruction from Multi-View Images with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26104-26113} }
HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Minghui and Ding, Pengxiang and Wang, Shu and Zhuang, Zifeng and Liu, Yang and Tong, Xinyang and Song, Wenxuan and Lyu, Shangke and Huang, Siteng and Wang, Donglin}, title = {HiF-VLA: Hindsight, Insight and Foresight through Motion Representation for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20732-20742} }
VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Shuohao and Fang, Qiang and Xu, Xin}, title = {VLM4RSDet: Collaborative Optimization with Vision-Language Model for Enhancing Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18450-18460} }
Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Guangpu and Kie{\ss}, Steffen and Luo, Hanxiang and Liu, Xingyu and Simon, Sven}, title = {Exact-GS: Mathematically Rigorous and Accurate 3D Gaussian Splatting for 3D X-ray Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4902-4911} }
SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yijian and Ou, Mingtao and Pan, Zijian and Ji, Xinglong}, title = {SDGS: Spatial Difference Guided Gaussian Splatting for Simultaneous Localization and 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4860-4869} }
Global Structure-from-Motion Meets Feedforward Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Linfei and Sch\"onberger, Johannes and Pollefeys, Marc}, title = {Global Structure-from-Motion Meets Feedforward Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21880-21890} }
TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiheng and Ren, Weiming and Liu, Haozhe and Zhou, Zijian and Chen, Shoufa and Qiu, Haonan and Huang, Xiaoke and An, Zhaochong and Yang, Fanny and Patel, Aditya and Atliha, Viktar and Ng, Tony and Han, Xiao and Zhu, Chuyan and Zhang, Chenyang and Liu, Ding and Perez-Rua, Juan-Manuel and He, Sen and Schmidhuber, J\"urgen and Chen, Wenhu and Luo, Ping and Liu, Wei and Xiang, Tao and Schult, Jonas and Cong, Yuren}, title = {TUNA: Taming Unified Visual Representations for Native Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15740-15751} }
Compositional Transformation Reasoning for Composed Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Sihong and Wu, Jiaxin and Jiang, Dongmei and Cai, Yi and Wang, Yaowei and Wei, Xiaoyong}, title = {Compositional Transformation Reasoning for Composed Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25644-25653} }
ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Yuxi and Yuan, Qiuyang and Zhong, Zhizhou and Zhao, Xuan and Zhou, Jiaogen and Zhu, Fubao and Guan, Jihong and Zhou, Shuigeng}, title = {ImmerIris: A Large-Scale Dataset and Benchmark for Off-Axis and Unconstrained Iris Recognition in Immersive Applications}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28838-28847} }
Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{VCR_2026_CVPR, author = {VCR, Sairam and Lalla, Rishabh and Dayal, Aveen and Kulkarni, Tejal and Lalla, Anuj and Balasubramanian, Vineeth N. and Khan, Muhammad Haris}, title = {Foundation Model Priors Enhance Object Focus in Feature Space for Source-Free Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29431-29440} }
ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Haoyang and Jiang, Hao and Mu, Yadong}, title = {ShreddingNet: Coarse-to-Fine Restoration for Multi-Source Shredded Manuscripts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8247-8256} }
Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yufeng and Chen, Lei and Zeng, Zhixiong and Zhao, Xuanle and Jiang, Deyang and Zheng, Liming and Huang, Jing and Qiu, Haibo and Shi, Peng and Yang, Siqi and Ma, Lin}, title = {Reading or Reasoning? Format Decoupled Reinforcement Learning for Document OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33164-33173} }
IGen: Scalable Data Generation for Robot Learning from Open-World Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Chenghao and Kang, Haolan and Lin, Junchao and Wang, Jinghe and Wu, Duo and Xie, Shuzhao and Huang, Fanding and Ge, Junchen and Gong, Ziyang and Li, Letian and Zheng, Hongying and Lv, Changwei and Wang, Zhi}, title = {IGen: Scalable Data Generation for Robot Learning from Open-World Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28111-28122} }
RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jihwan and Yang, Chanhyeong and Park, Jinyoung and Song, Taehoon and Kim, Hyunwoo J.}, title = {RegFormer: Transferable Relational Grounding for Efficient Weakly-Supervised Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10387-10396} }
MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Haoran and Lee, Gim Hee}, title = {MotionScale: Reconstructing Appearance, Geometry, and Motion of Dynamic Scenes with Scalable 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11860-11870} }
$\phi$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Truong_2026_CVPR, author = {Truong, Thanh-Dat and Tran, Huu-Thien and Cothren, Jackson and Raj, Bhiksha and Luu, Khoa}, title = {\${\textbackslash}phi\$-DPO: Fairness Direct Preference Optimization Approach to Continual Learning in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39922-39934} }
PhyCo: Learning Controllable Physical Priors for Generative Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Jiang, Ziyu and Narasimhan, Srinivasa and Chandraker, Manmohan}, title = {PhyCo: Learning Controllable Physical Priors for Generative Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41892-41902} }
Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guarino_2026_CVPR, author = {Guarino, Vanessa Emanuela and Winklmayr, Claudia and Franzen, Jannik and Rumberger, Josef Lorenz and Pfeuffer, Manuel and Greven, Sonja and Maier-Hein, Klaus and Kainmueller, Dagmar and Karg, Christoph and L\"uth, Carsten T.}, title = {Better than Average: Spatially-Aware Aggregation of Segmentation Uncertainty Improves Downstream Performance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13145-13156} }
Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Toshpulatov_2026_CVPR, author = {Toshpulatov, Mukhiddin and Lee, Wookey and Lee, Suan and Lee, Geehyuk}, title = {Real-Time Multimodal Fingertip Contact Detection via Depth and Motion Fusion for Vision-Based Human-Computer Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1819-1828} }
Weight Space Representation Learning via Neural Field Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhuoqian and Salzmann, Mathieu and S\"usstrunk, Sabine}, title = {Weight Space Representation Learning via Neural Field Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17734-17743} }
Bringing Your Portrait to 3D Presence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiawei and Chu, Lei and Li, Jiahao and Zang, Zhenyu and Li, Chong and Li, Xiao and Cao, Xun and Zhu, Hao and Lu, Yan}, title = {Bringing Your Portrait to 3D Presence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28468-28480} }
Learning Straight Flows: Variational Flow Matching for Efficient Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Chenrui and Xiao, Xi and Wang, Tianyang and Wang, Xiao and Shen, Yanning}, title = {Learning Straight Flows: Variational Flow Matching for Efficient Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38154-38164} }
OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kaichen and Wu, Keming and Yang, Zuhao and Li, Bo and Hu, Kairui and Wang, Bin and Li, Xingxuan and Bing, Lidong}, title = {OpenMMReasoner: Pushing the Frontiers in Multimodal Reasoning with an Open and General Recipe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19276-19286} }
MMGait: Towards Multi-Modal Gait Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenye and Cai, Qingyuan and Hou, Saihui and Li, Aoqi and Huang, Yongzhen}, title = {MMGait: Towards Multi-Modal Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1726-1736} }
Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Huakeng and Chen, Yaowen and Zhou, Kun and Wu, Hongzhi}, title = {Differentiable Adaptive 4D Structured Illumination for Joint Capture of Shape and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12448-12457} }
DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Fangtai and Liu, Mushui and He, Weijie and Wang, Zhao and Yu, Yunlong}, title = {DCoAR: Deep Concept Injection into Unified Autoregressive Models for Personalized Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29265-29274} }
Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Pochimireddy_2026_CVPR, author = {Pochimireddy, Charantej and Sahoo, Subhasmita and Verma, Apoorva and Shyam, Palavalli and Malviya, Swapnil and Sarvesh, Sarvesh and Gadde, Raj}, title = {Efficient Real-Time Raw-to-Raw Denoising for Extreme Low-Light Ultra HD Video on Mobile Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1376-1385} }
OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingkui and Liang, Dingkang and Chen, Cheng and Zhang, Daoxin and Hanxiang, lv and Xu, Zhe and Hu, Yao and Bai, Xiang}, title = {OneSparse: A Unified Framework for Sparse Activation Layers in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12084-12094} }
Dynamic Visual SLAM using a General 3D Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Xingguang and Jin, Liren and Popovic, Marija and Behley, Jens and Stachniss, Cyrill}, title = {Dynamic Visual SLAM using a General 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21805-21815} }
W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zirui and Yang, Biao and Ni, Rongrong and Zhou, Zhongkai and Shen, Shaobo}, title = {W2W: Language-Model-Based Trajectory Prediction with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23538-23548} }
Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Jingqi and Mou, Yurong and Li, Hangcheng and Li, Mingzhe and Yang, Yongzhuo and Zhang, Ming and Chen, Qiguang and Liang, Tianyi and Hu, Xiaomeng and Zheng, Yining and Chen, Xinchi and Zhao, Jun and Huang, Xuanjing and Qiu, Xipeng}, title = {Thinking with Video: Video Generation as a Promising Multimodal Reasoning Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41121-41129} }
RAID: Retrieval-Augmented Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Mingxiu and Zhang, Zhe and Wu, Gaochang and Chai, Tianyou and Zhu, Xiatian}, title = {RAID: Retrieval-Augmented Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21367-21378} }
ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Panwang and Zhao, Jingjing and Lin, Yuchen and Lin, Chenguo and Li, Chenxin and Liu, Hengyu and Shen, Tingting and Mu, Yadong}, title = {ID-Crafter: VLM-Grounded Online RL for Compositional Multi-Subject Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36627-36637} }
Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Junbo and Xu, Yangyang and Wang, Chao and Wen, You-Wei}, title = {Content-Aware Frequency Encoding for Implicit Neural Representations with Fourier-Chebyshev Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3646-3655} }
ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhihang and Bao, Xiaoyi and Li, Pandeng and Zhou, Junjie and Liao, Zhaohe and He, Yefei and Jiang, Kaixun and Xie, Chen-Wei and Zheng, Yun and Xie, Hongtao}, title = {ShowTable: Unlocking Creative Table Visualization with Collaborative Reflection and Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24405-24416} }
Order Matters: 3D Shape Generation from Sequential VR Sketches-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yizi and Wu, Sidi and Xiao, Tianyi and Wiedemann, Nina and Landrieu, Loic}, title = {Order Matters: 3D Shape Generation from Sequential VR Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34259-34269} }
PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Mahadev_2026_CVPR, author = {Mahadev, Rohan and Yuan, Joyce and Poirson, Patrick and Xue, David and Wu, Hao-Yu and Kislyuk, Dmitry}, title = {PinPoint: Evaluation of Composed Image Retrieval with Explicit Negatives, Multi-Image Queries, and Paraphrase Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9742-9751} }
Physical Object Understanding with a Physically Controllable World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Venkatesh_2026_CVPR, author = {Venkatesh, Rahul and Kotar, Klemen and Chen, Lilian Naing and Lee, Wanhee and Ancone, Gia and Kim, Seungwoo and Wheeler, Luca Thomas and Watrous, Jared and Chen, Honglin and Bear, Daniel and Stojanov, Stefan and Yamins, Daniel LK}, title = {Physical Object Understanding with a Physically Controllable World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2593-2602} }
Hybrid Agents for Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Li, Xin and Lu, Yiting and Chen, Zhibo}, title = {Hybrid Agents for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22636-22647} }
Same or Not? Enhancing Visual Perception in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marsili_2026_CVPR, author = {Marsili, Damiano and Mehta, Aditya and Lin, Ryan Y. and Gkioxari, Georgia}, title = {Same or Not? Enhancing Visual Perception in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17303-17315} }
Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields-
[pdf]
[supp]
[bibtex]@InProceedings{Dhiman_2026_CVPR, author = {Dhiman, Ankit and Lu, Tao and Srinath, R and Arslan, Emre and Xing, Angela and Xiangli, Yuanbo and Radhakrishnan, Venkatesh Babu and Sridhar, Srinath}, title = {Turbo-GS: Accelerating 3D Gaussian Fitting for High-Resolution Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15454-15464} }
DF^2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification-
[pdf]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuena and Cai, Haichun and Shan, Yi and Wei, Hao and Deng, Yongjian and Yang, Zhen and Lyu, Gengyu}, title = {DF{\textasciicircum}2-VB: Dual-level Fuzzy Fusion with View-specific Boosting for Multi-view Multi-label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33859-33868} }
EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Wenjia and Pan, Liang and Pi, Huaijin and Lou, Yuke and Ren, Xuqian and Wu, Yifan and Liao, Zhouyingcheng and Yang, Lei and Dabral, Rishabh and Theobalt, Christian and Komura, Taku}, title = {EmbodMocap: In-the-Wild 4D Human-Scene Reconstruction for Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28424-28434} }
Learning Effective Sign Features without Text for Gloss-free Sign Language Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Shiwei and Liu, Xiao and Yin, Yafeng and Liu, Nan and Liu, Kuizhuang and Tuerdaken, Desibieer and Jiang, Zhiwei and Xie, Lei and Lu, Sanglu and Wen, Hongkai}, title = {Learning Effective Sign Features without Text for Gloss-free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9827-9836} }
Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, WonJun and Seong, Hyun Seok and Heo, Jae-Pil}, title = {Reconstruction-Guided Slot Curriculum: Addressing Object Over-Fragmentation in Video Object-Centric Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25001-25010} }
GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Daruna_2026_CVPR, author = {Daruna, Angel and Meegan, Nicholas and Chiu, Han-Pang and Samarasekera, Supun and Kumar, Rakesh}, title = {GeoSURGE: Geo-localization using Semantic Fusion with Hierarchy of Geographic Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41396-41405} }
Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Bin and Abdelsamad, Mohamed and Zhang, Miao and Condurache, Alexandru Paul}, title = {Towards Foundation Models for 3D Scene Understanding: Instance-Aware Self-Supervised Learning for Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2938-2947} }
Building a Precise Video Language with Human-AI Oversight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhiqiu and Cen, Siyuan and Mitra, Chancharik and Li, Isaac and Huang, Yuhan and Ling, Yu Tong Tiffany and Wang, Hewei and Pi, Irene and Zhu, Shihang and Han, Yili and Du, Yilun and Ramanan, Deva}, title = {Building a Precise Video Language with Human-AI Oversight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11334-11345} }
Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xingyue and Peng, Shuai and Xie, Xiangyu and Zhu, Jianhua and Zhou, Yuxuan and Gao, Liangcai}, title = {Clair Obscur: an Illumination-Aware Method for Real-World Image Vectorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9161-9170} }
Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yicheng and Song, Tao and Wu, Zhonghua and Ye, Jin and Ge, Zongyuan and Bai, Wenjia and Chen, Zhaolin and Cai, Jianfei}, title = {Virtual Full-stack Scanning of Brain MRI via Imputing Any Quantised Code}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21026-21035} }
MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chaubey_2026_CVPR, author = {Chaubey, Ashutosh and Pang, Jiacheng and Soleymani, Mohammad}, title = {MoD-DPO: Towards Mitigating Cross-modal Hallucinations in Omni LLMs using Modality Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18284-18294} }
PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahao and Li, Zizhang and Yu, Hong-Xing and Wu, Jiajun}, title = {PerpetualWonder: Long-horizon Action-conditioned 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25370-25380} }
Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Yanglin and Xu, Tianyang and Cheng, Chunyang and Li, Hui and Wu, Xiaojun and Kittler, Josef}, title = {Beyond Strict Pairing: Arbitrarily Paired Training for High-Performance Infrared and Visible Image Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12334-12343} }
PhotoFramer: Multi-modal Image Composition Instruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Zhiyuan and Wang, Ke and Zhang, He and Cai, Xin and Gu, Jinjin and Xue, Tianfan and Dong, Chao and Zhang, Zhoutong}, title = {PhotoFramer: Multi-modal Image Composition Instruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10197-10207} }
SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yifei and Lou, Qian and Zheng, Mengxin}, title = {SIF: Semantically In-Distribution Fingerprints for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17399-17408} }
Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yexing and Feng, Wei and Zhang, Shen and Wang, Haohan and Qin, Yuxin and Li, Yaoyu and Ma, Ao and Luo, Yuhao and Wang, Lu and Ren, Xudong and Wang, Haoran and Ling, Run and Zhang, Zheng and Lv, Jingjing and Shen, Junjie and Law, Ching and Wang, Longguang and Guo, Yulan}, title = {Design Your Ad: Personalized Advertising Image and Text Generation with Unified Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {472-483} }
Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Hulingxiao and Tan, Zhi and Peng, Yuxin}, title = {Taxonomy-Aware Representation Alignment for Hierarchical Visual Recognition with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31124-31134} }
WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Ao and Kong, Lingdong and Yan, Tianyi and Liu, Hongsi and Yang, Yu and Huang, Ziqi and Yin, Wei and Zuo, Jialong and Hu, Yixuan and Zhu, Dekai and Lu, Dongyue and Liu, Youquan and Jiang, Guangfeng and Li, Linfeng and Li, Xiangtai and Zhuo, Long and Ng, Lai Xing and Cottereau, Benoit R. and Gao, Changxin and Pan, Liang and Ooi, Wei Tsang and Liu, Ziwei}, title = {WorldLens: Full-Spectrum Evaluations of Driving World Models in Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36385-36399} }
DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhuoling and Rahmani, Hossein and Zhang, Jiarui and Xue, Yu and Mirmehdi, Majid and Kuen, Jason and Gu, Jiuxiang and Liu, Jun}, title = {DiffGraph: An Automated Agent-driven Model Merging Framework for In-the-Wild Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36713-36723} }
ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jingjing and Tang, Anda and Mao, Qiangqiang and Lin, Zhouchen and Cao, Yankai}, title = {ReFTA: Breaking the Weight Reconstruction Bottleneck in Tensorized Parameter-Efficient Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26369-26378} }
From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Donglai and Yang, Hongzheng and Zhao, Yuzhi and Zhang, Pingping and Chen, Jinpeng and Ma, Wenao and Hou, Zhijian and Wu, Mengyang and Li, Xiaolei and Hu, Senkang and Guan, Ziyi and Li, Jason Chun Lok and Po, Lai-Man}, title = {From Exploration to Exploitation: A Two-Stage Entropy RLVR Approach for Noise-Tolerant MLLM Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17776-17786} }
Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Su and Zhao, Cheng and Mittal, Himangi and Mittal, Gaurav and Kukkala, Rohith and Chen, Yingjie Victor and Chen, Mei}, title = {Tracking-Guided 4D Generation: Foundation-Tracker Motion Priors for 3D Model Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40992-41001} }
Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Piedade_2026_CVPR, author = {Piedade, Valter and Manam, Lalit and Yamazaki, Masashi and Miraldo, Pedro}, title = {Revisiting Monocular SLAM with Spatio-Temporal Scene Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28925-28936} }
RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, I-Hsiang and Liu, Yu-Wei and Wu, Tse-Yu and Chiang, Yu-Chien and Yang, Jen-Chieh and Chen, Wei-Ting}, title = {RobustVisRAG: Causality-Aware Vision-Based Retrieval-Augmented Generation under Visual Degradations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40802-40811} }
Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification-
[pdf]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Shuilian and Jia, Qi and Liu, Yu and Zhang, Pengshuo and Sun, Lili and Wang, Weimin and Zhu, Yanmei and Zhang, Bo and Fan, Xin}, title = {Dual-Level Hypergraph Generation for Addressing Feature Scarcity in Whole-Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28328-28337} }
Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yiqing and Song, Yiren and Shou, Mike Zheng}, title = {Edit2Perceive: Image Editing Diffusion Models Are Strong Dense Perceivers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43568-43577} }
Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhi and Fan, YaoQi and Chen, Zhe and Cao, Yue and Liu, Yangzhou and Lu, Tong}, title = {Will Multimodal Models Be Dazzled by Multi-Image Visual Puzzles?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11943-11953} }
Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yinghao and Jin, Yeying and Chen, Xiang and Wei, Yanyan and Yan, Ziyang and Fu, Yaowen}, title = {Unpaired Image Deraining Using Reward-Guided Self-Reinforcement Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1342-1354} }
Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenxi and Liu, Xianggan and Shen, Dake and Du, Yaosong and Yao, Zhibo and Jiang, Hao and Jiang, Linyi and Cao, Chengwei and Zhang, Jingzhe and Peng, RanYi and Bai, Peiling and Huang, Xiande}, title = {Models as Lego Builders: Assembling Malice from Benign Blocks via Semantic Blueprints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1533-1542} }
GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Rong and Xiao, Kaiyan and Zhu, Minghao and Wang, Liuyi and Dai, Kai and Yang, Zhao}, title = {GroundVTS: Visual Token Sampling in Multimodal Large Language Models for Video Temporal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10408-10418} }
LAM: Language Articulated Object Modelers-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yipeng and Ge, Yunhao and Cai, Peilin and Seita, Daniel and Itti, Laurent}, title = {LAM: Language Articulated Object Modelers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16010-16020} }
The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bingfeng and Yu, Siyue and Li, Hui and Lin, Jiahua and Wang, Wenwu and Xiao, Jimin}, title = {The Power of Prior: Training-Free Open-Vocabulary Semantic Segmentation with LLaVA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6336-6345} }
SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chao and Yang, Zijin and Wang, Yaofei and Qi, Yuang and Zhang, Weiming and Yu, Nenghai and Chen, Kejiang}, title = {SWIFT: Sliding Window Reconstruction for Few-Shot Training-Free Generated Video Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31725-31734} }
Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2026_CVPR, author = {Cha, Hyunsoo and Woo, Wonjung and Kim, Byungjun and Joo, Hanbyul}, title = {Vanast: Virtual Try-On with Human Image Animation via Synthetic Triplet Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3988-3997} }
TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification-
[pdf]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yunlong and Liang, Wenxin and Wang, Guanglu and Guan, Senqi and Zong, Linlin and Zhang, Dongyu and Liu, Xinyue}, title = {TAPE: Task-Adaptive Prototype Evolution in Audio-Language Models for Fully Few-shot Class-incremental Audio Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19570-19579} }
Mapping Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sen_2026_CVPR, author = {Sen, Lord and Mukherjee, Shyamapada}, title = {Mapping Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36215-36223} }
Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Xiaoyu and Ren, Ketong and She, Dongyu and Dong, Weiming and Wang, Miao}, title = {Image Guides Images: Consistent Video Amodal Completion with Rectified In-Context Exemplar Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8257-8266} }
HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Daichao and Chen, Qiupu and He, Feng and Ning, Xin and Li, Qiankun}, title = {HG-Lane: High-Fidelity Generation of Lane Scenes under Adverse Weather and Lighting Conditions without Re-annotation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8439-8448} }
IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Junxian and Xu, Beining and Chen, Simin and Li, Jiatong and Lei, Jingdi and Zhao, Haodong and Zhang, Di}, title = {IAG: Input-aware Backdoor Attack on VLM-based Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27872-27883} }
ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Quan_2026_CVPR, author = {Quan, Jinsheng and Miao, Qiaowei and Xu, Yichao and Lin, Zizhuo and Li, Ying and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {ParticleGS: Learning Neural Gaussian Particle Dynamics from Videos for Prior-free Physical Motion Extrapolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8331-8341} }
CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xin and Lu, Peng and Chen, Yisong and Pan, Chengwei and Li, Sheng}, title = {CoRoGS: Contextual Gaussian Splatting for Robust Large-Deviation View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8288-8297} }
MotionMaster: Generalizable Text-Driven Motion Generation and Editing-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nan and Li, Yunhao and Pang, Lexi and He, Zimo and Huang, Siyuan and Zhu, Yixin}, title = {MotionMaster: Generalizable Text-Driven Motion Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30629-30639} }
ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Compagnoni_2026_CVPR, author = {Compagnoni, Alberto and Morini, Marco and Sarto, Sara and Cocchi, Federico and Caffagni, Davide and Cornia, Marcella and Baraldi, Lorenzo and Cucchiara, Rita}, title = {ReAG: Reasoning-Augmented Generation for Knowledge-based Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11901-11911} }
LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Luddecke_2026_CVPR, author = {L\"uddecke, Timo and Meier, Jan Frederik and van Delden, Jan and Ecker, Alexander}, title = {LiDeRe: A Lightweight Readout for Fast and Data-Efficient Dense Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2959-2971} }
LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bozorgtabar_2026_CVPR, author = {Bozorgtabar, Behzad and Mahapatra, Dwarikanath and Roy, Sudipta and Naseer, Muzammal and Razzak, Imran and Ge, Zongyuan}, title = {LATA: Laplacian-Assisted Transductive Adaptation for Conformal Uncertainty in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36311-36320} }
DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Jiamu and Yan, Zhiyuan and Zhang, Ke-Yue and Yao, Taiping and Ding, Shouhong}, title = {DFD-HR: Generalizable Deepfake Detection via Hierarchical Routing Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13984-13995} }
Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiangning and Zhu, Junwei and Gan, Zhenye and Luo, Donghao and Lin, Chuming and Xu, FeiFan and Peng, Xu and Hu, Jianlong and Liu, Yuansen and Hong, Yijia and Cao, Weijian and Feng, Han and Chen, Xu and Fu, Chencan and He, Keke and Hu, Xiaobin and Wang, Chengjie}, title = {Soul: Breathe Life into Digital Human for High-fidelity Long-term Multimodal Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3953-3964} }
Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Tingyu and Sun, Zheng and Wei, Jingxuan and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {Decouple to Generalize: Context-First Self-Evolving Learning for Data-Scarce Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29357-29366} }
AceTone: Bridging Words and Colors for Conditional Image Grading-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tianren and Liao, Mingxiang and Zhang, Xijin and Ye, Qixiang}, title = {AceTone: Bridging Words and Colors for Conditional Image Grading}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25851-25860} }
MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chade and Feng, Haida and Zhang, Pengju and Wu, Yihong}, title = {MORE-STEM: Long-Short MemOry REcall and Spatio-TEmporal Consistency Model for Query-Driven 3D/4D Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31462-31471} }
Homaloidal parametrization for detecting critical two-view configurations-
[pdf]
[supp]
[bibtex]@InProceedings{Madhavan_2026_CVPR, author = {Madhavan, Rakshith and Forlivesi, Matteo and Bertolini, Marina and Turrini, Cristina and Arrigoni, Federica and Magri, Luca}, title = {Homaloidal parametrization for detecting critical two-view configurations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26432-26440} }
BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zishu and Su, Xiang-Xiang and Zhou, Shengning and Chen, Guang-Yong and Fan, Guodong and Chen, Xing}, title = {BiEvLight: Bi-level Learning of Task-Aware Event Refinement for Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41541-41550} }
Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Adimoolam_2026_CVPR, author = {Adimoolam, Yeshwanth Kumar and Poullis, Charalambos and Averkiou, Melinos}, title = {Data Leakage Detection and De-duplication in Large Scale Geospatial Image Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {72-81} }
ConsistCompose: Unified Multimodal Layout Control for Image Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Xuanke and Li, Boxuan and Han, Xiaoyang and Cai, Zhongang and Yang, Lei and Wang, Quan and Lin, Dahua}, title = {ConsistCompose: Unified Multimodal Layout Control for Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {495-505} }
Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Subin and Kim, Jung Uk}, title = {Generate, Analyze, and Refine: Training-Free Sound Source Localization via MLLM Meta-Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15752-15761} }
Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Tzu Ling and Stavness, Ian and Rochan, Mrigank}, title = {Learnable Motion-Focused Tokenization for Effective and Efficient Video Unsupervised Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31262-31271} }
RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jiahe and Wang, Chuang and Jiang, Bowen and Wang, Yinfan and Zheng, Hao and Wei, Xingjian and Liu, Chengjin and Nie, Rui and Gao, Junyuan and Sun, Jiaxing and Wang, Yubin and Wu, Lijun and Huang, Zhenhua and Wu, Jiang and Yu, Qian and He, Conghui}, title = {RxnCaption: Reformulating Reaction Diagram Parsing as Visual Prompt Guided Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30864-30873} }
Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dao_2026_CVPR, author = {Dao, Quan and Metaxas, Dimitris}, title = {Multi-Patch Global-to-Local Transformer Architecture For Efficient Flow Matching and Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33000-33011} }
iSplat: Iterative Learning for Fine-Grained Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haifeng and Long, Wei and Gu, Shuhang and Duan, Lixin and Li, Wen}, title = {iSplat: Iterative Learning for Fine-Grained Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11746-11755} }
Gyro-based Deep Video Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Rim_2026_CVPR, author = {Rim, Jaesung and Kim, Woohyeok and Lee, Haeyun and Yang, Heemin and Wang, Ke and Cho, Sunghyun}, title = {Gyro-based Deep Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8364-8374} }
Detect Anything via Next Point Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Qing and Huo, Junan and Chen, Xingyu and Xiong, Yuda and Zeng, Zhaoyang and Chen, Yihao and Ren, Tianhe and Yu, Junzhi and Zhang, Lei}, title = {Detect Anything via Next Point Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25472-25483} }
VidEoMT: Your ViT is Secretly Also a Video Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Norouzi_2026_CVPR, author = {Norouzi, Narges and Zulfikar, Idil Esen and Cavagnero, Niccol\`o and Kerssies, Tommie and Leibe, Bastian and Dubbelman, Gijs and de Geus, Daan}, title = {VidEoMT: Your ViT is Secretly Also a Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35177-35186} }
PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shuai_2026_CVPR, author = {Shuai, Xincheng and Tang, Song and Huang, Yutong and Ding, Henghui and Tao, Dacheng}, title = {PSDesigner: Automated Graphic Design with a Human-Like Creative Workflow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10165-10175} }
VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongan and Wen, Xiaoyu and Du, Lingxiao and Li, Kun and Wu, Zhiliang and Xu, Xingcheng and Zhang, Qiaosheng and Lu, Chaochao and Fan, Hehe}, title = {VAST: Video Ability-Stratified Taxonomy for Data-Efficient Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18576-18586} }
HQC-NBV: A Hybrid Quantum-Classical View Planning Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Xiaotong and Chen, Chang Wen}, title = {HQC-NBV: A Hybrid Quantum-Classical View Planning Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35091-35100} }
TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Guang and Shao, Jie and Tang, Ningyuan and Liu, Xinyao and Wu, Jianxin}, title = {TWEO: Transformers Without Extreme Outliers Enables FP8 Training And Quantization For Dummies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6095-6105} }
Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation-
[pdf]
[bibtex]@InProceedings{Chharia_2026_CVPR, author = {Chharia, Aviral and De la Torre, Fernando}, title = {Multi-view Consistent 3D Gaussian Head Avatars 'without' Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40163-40174} }
HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sayem_2026_CVPR, author = {Sayem, MD Khalequzzaman Chowdhury and Chowdhury, Mubarrat Tajoar and Tiruneh, Yihalem Yimolal and Khan, Muneeb A. and Ali, Muhammad Salman and Bhattarai, Binod and Baek, Seungryul}, title = {HandVQA: Diagnosing and Improving Fine-Grained Spatial Reasoning about Hands in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2515-2525} }
FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Myunsoo and Shim, Seongwoong and Lee, Byung-Jun}, title = {FALCON: False-Negative Aware Learning of Contrastive Negatives in Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {701-711} }
D2T2 - Multimodal Automated Planning for Brachytherapy-
[pdf]
[supp]
[bibtex]@InProceedings{Moore_2026_CVPR, author = {Moore, Lance C. and Mitra, Aranyo and Truong, Ryan and Kallis, Karoline and Kisling, Kelly and Meyers, Sandra M. and Vasconcelos, Nuno}, title = {D2T2 - Multimodal Automated Planning for Brachytherapy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42670-42680} }
Toward Early Quality Assessment of Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Huanlei and Wei, Hongxin and Jing, Bingyi}, title = {Toward Early Quality Assessment of Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38410-38419} }
Semantic Foam: Unifying Spatial and Semantic Scene Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharafeldin_2026_CVPR, author = {Sharafeldin, Amr and Mikaeili, Aryan and Walker, Thomas and Govindarajan, Shrisudhan and Rebain, Daniel and Yi, Kwang Moo and Tagliasacchi, Andrea}, title = {Semantic Foam: Unifying Spatial and Semantic Scene Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29814-29823} }
MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Krishnamurthy_2026_CVPR, author = {Krishnamurthy, Bharath and Rattani, Ajita}, title = {MMFace-DiT: A Dual-Stream Diffusion Transformer for High-Fidelity Multimodal Face Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4580-4589} }
Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Li, Xueqi and Chen, Yiyang and Song, Jin and Wang, Yihan and Xiao, Zipeng and Su, Jiadi and Qiaoben, You and Liu, Pengfei and Deng, Zhijie}, title = {Mantis: A Versatile Vision-Language-Action Model with Disentangled Visual Foresight}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42505-42515} }
3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zhixue and He, Xu and Tang, Songlin and Zhang, Haoxian and Li, Qingfeng and Liu, Xiaoqiang and Wan, Pengfei and Gai, Kun}, title = {3D-Aware Implicit Motion Control for View-Adaptive Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2243-2252} }
Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yurun and Hu, Xueyu and Liu, Yuhan and Wang, Ziqi and Liao, Zeyi and Chen, Lin and Wei, Feng and Qian, Yuxi and Zheng, Bo and Yin, Keting and Zhang, Shengyu}, title = {Graph2Eval: Automatic Multimodal Task Generation for Agents via Knowledge Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {735-744} }
DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xingjian and Wang, Lishun and Wang, Ping and Yuan, Xin}, title = {DetectSCI: Toward Object-Guided ROI Reconstruction for High-Resolution Video Snapshot Compressive Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41720-41729} }
Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziqi and Che, Chang and Wang, Qi and Ma, Hui and Shi, Zenglin and Snoek, Cees G. M. and Wang, Meng}, title = {Harmonious Parameter Adaptation in Continual Visual Instruction Tuning for Safety-Aligned MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17282-17291} }
Explaining CLIP Zero-shot Predictions Through Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ozdemir_2026_CVPR, author = {Ozdemir, Onat and Christensen, Anders and Alaniz, Stephan and Akata, Zeynep and Akbas, Emre}, title = {Explaining CLIP Zero-shot Predictions Through Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31336-31345} }
From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yepeng and Li, Hao and Yang, Liwen and Li, Fangzhen and Ge, Xudi and Gu, Yuliang and Gao, Kuang and Wang, Bing and Chen, Guang and Ye, Hangjun and Xu, Yongchao}, title = {From Pairs to Sequences: Track-Aware Policy Gradients for Keypoint Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21453-21463} }
SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papais_2026_CVPR, author = {Papais, Sandro and Feng, Lezhou and Cossette, Charles and Ge, Lingting}, title = {SToRe3D: Sparse Token Relevance in ViTs for Efficient Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40751-40761} }
Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Consensus vs. Controversy: Mapping the Decision Space Where Architectures Diverge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34566-34574} }
VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2026_CVPR, author = {Gui, Haokun and Yang, Senqiao and Zhu, Mingkang and Chu, Meng and Wu, Sitong and Lu, Changsheng and Wang, Zihao and Tian, Zhuotao and Jia, Jiaya}, title = {VisionLeaf: Entropy-Guided Leaf-First Reasoning for Efficient and Accurate Think-with-Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5187-5198} }
SIR: Structured Image Representations for Explainable Robot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mattes_2026_CVPR, author = {Mattes, Paul and Schwab, Jan and Bosch, Jens and Li, Maximilian Xiling and Blank, Nils and Tang, Minh-Trung and Haberland, Moritz and Lioutikov, Rudolf}, title = {SIR: Structured Image Representations for Explainable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42484-42493} }
Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xinyu and Jia, Heng and Zeng, Zhengwen and Shen, Shuheng and Meng, Changhua and Yang, Yi and Zhu, Linchao}, title = {Unified Generation and Self-Verification for Vision-Language Models via Advantage Decoupled Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36916-36925} }
Towards Calibrating Prompt Tuning of Vision- Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharifdeen_2026_CVPR, author = {Sharifdeen, Ashshak and Shamshad, Fahad and Munir, Muhammad Akhtar and Basu, Abhishek and Ismithdeen, Mohamed and Jeyamohan, Jeyapriyan and Silva, Chathurika and Nandakumar, Karthik and Khan, Muhammad Haris}, title = {Towards Calibrating Prompt Tuning of Vision- Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39131-39140} }
Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Renye and Cheng, Jikang and Sun, Shikun and Sun, Yi and Wu, You and Peng, Wei and Wang, Zongwei and Liang, Ling and Xing, Junliang and Cai, Yimao}, title = {Do Less, Achieve More: Do We Need Every-Step Optimization for RL Fine-tuning of Diffusion Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16561-16571} }
EgoX: Egocentric Video Generation from a Single Exocentric Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Taewoong and Kim, Kinam and Kim, Dohyeon and Park, Minho and Hyung, Junha and Choo, Jaegul}, title = {EgoX: Egocentric Video Generation from a Single Exocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11116-11126} }
Rethinking Glyph Spatial Information in Font Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Peng and Yang, Xi}, title = {Rethinking Glyph Spatial Information in Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29190-29199} }
Synthetic Curriculum Reinforces Compositional Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shijian and Fu, Runhao and Zhao, Siyi and Zhan, Qingqin and Wang, Xingjian and Jin, Jiarui and Lu, Yuan and Wu, Hanqian and Chen, Cunjian}, title = {Synthetic Curriculum Reinforces Compositional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21953-21963} }
Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yaoxin and Ye, Peng and Tan, Xudong and Tu, Chongjun and Zhao, Maosen and Hao, Jia and Chen, Tao}, title = {Revisiting Multimodal KV Cache Compression: A Frequency-Domain-Guided Outlier-KV-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39550-39560} }
Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Zhang, Gaoyang and Tan, Jun and Liu, Xinguo}, title = {Opti-NeuS: Neural Reconstruction for Dual-Layered Transparent and Opaque Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22594-22603} }
Dynamic Important Example Mining for Reinforcement Finetuning-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Haoru and Wu, Sitong and Chen, Yanfeng and Zhao, Shizhen and Sun, Yang-Tian and Liu, Tianjia and Chang, Chirui and Zhang, Shaofeng and Sun, Samm and Wu, Xiuzhe and Xie, Ruobing and Qi, Xiaojuan}, title = {Dynamic Important Example Mining for Reinforcement Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41456-41466} }
SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation-
[pdf]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Jiayi and Qin, Zheyun and Xi, Xiaoming and Nie, Xiushan and Yin, Yilong}, title = {SPOT: Spatiotemporal Prompt Optimization for Motion-Stabilized MLLM-Guided Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32236-32245} }
The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments-
[pdf]
[supp]
[bibtex]@InProceedings{Shahar_2026_CVPR, author = {Shahar, Ofir Itzhak and Elkin, Gur and Ben-Shahar, Ohad}, title = {The Missing GAP: From Solving Square Jigsaw Puzzles to Handling Real World Archaeological Fragments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3186-3196} }
COT-FM: Cluster-wise Optimal Transport Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chiang_2026_CVPR, author = {Chiang, Chiensheng and Tu, Kuan-Hsun and Liao, Jia-Wei and Chou, Cheng-Fu and Ke, Tsung-Wei}, title = {COT-FM: Cluster-wise Optimal Transport Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11515-11524} }
KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Mengxin and Wang, Yulin and Luo, Chen and Li, Yongzhe and Zhou, Yijun}, title = {KASALv2: Fully Automatic 3D Rotational Symmetry Classification and Axis Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13866-13875} }
Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Xu, Yufei and Zeng, Zhixiong and Yang, Siqi and Shi, Peng and Ma, Lin and Zhang, Jing}, title = {Perceptual-Evidence Anchored Reinforced Learning for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41111-41120} }
Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zehao and Jiang, Huaide and Dong, Shuaiwu and Wang, Yuping and Qiu, Hang and Li, Jiachen}, title = {Drive My Way: Preference Alignment of Vision-Language-Action Model for Personalized Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25204-25214} }
EventGait: Towards Robust Gait Recognition with Event Streams-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Senyan and Chen, Shuai and Shen, Chuanfu and Liu, Kean and Sun, Zhijing and Cao, Chengzhi and Fu, Xueyang}, title = {EventGait: Towards Robust Gait Recognition with Event Streams}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22323-22334} }
LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Longzhao and Zhang, Shuo and Gao, Chen and Tian, Qian and Lin, Youfang}, title = {LF-BVN: Blind-View Network for Self-Supervised Light Field Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1355-1364} }
Live Interactive Training for Video Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xinyu and Yu, Haozheng and Sun, Yihong and Hariharan, Bharath and Sun, Jennifer J.}, title = {Live Interactive Training for Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39827-39837} }
Learning Long-term Motion Embeddings for Efficient Kinematics Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stracke_2026_CVPR, author = {Stracke, Nick and Bauer, Kolja and Baumann, Stefan Andreas and Bautista, Miguel \'Angel and Susskind, Josh and Ommer, Bj\"orn}, title = {Learning Long-term Motion Embeddings for Efficient Kinematics Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42581-42591} }
Test-time Sparsity for Extreme Fast Action Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Kangye and Meng, Yuan and Zhou, Jianbo and Li, Ye and Tang, Chen and Wang, Zhi}, title = {Test-time Sparsity for Extreme Fast Action Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9414-9423} }
MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Haowen and Pei, Gensheng and Sun, Zeren and Ren, Mingwu and Shu, Xiangbo and Yao, Yazhou and Shen, Fumin}, title = {MedFG-VQA: Low-Frequency Memory and Graph Attention for Lightweight Medical VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42755-42764} }
FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yufei and Tian, Long and Dai, Yuyang and Chen, Wenchao and Bao, Liang and Liu, Xiyang}, title = {FastRef: Fast Prototype Refinement for Few-shot Industrial Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43040-43049} }
SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mengzhen and Zhou, Enshen and Chi, Cheng and Han, Yi and Rong, Shanyu and Chen, Liming and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {SaPaVe: Towards Active Perception and Manipulation in Vision-Language Action Models for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37164-37174} }
Understanding Task Transfer in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2026_CVPR, author = {Sachdeva, Bhuvan and Uppal, Karan and Java, Abhinav and Balasubramanian, Vineeth N.}, title = {Understanding Task Transfer in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28754-28763} }
The Universal Normal Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tasker_2026_CVPR, author = {Tasker, Chen and Betser, Roy and Gofer, Eyal and Levi, Meir Yossef and Gilboa, Guy}, title = {The Universal Normal Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32015-32025} }
Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiang and Hu, Zhangchi and Xiao, Xu and Kong, Bin}, title = {Look Before You Fuse: 2D-Guided Cross-Modal Alignment for Robust 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11589-11598} }
Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2026_CVPR, author = {Hao, Zecheng and Huang, Yifan and Xu, Zijie and Liu, Wenxuan and Tang, Yuanhong and Yu, Zhaofei and Huang, Tiejun}, title = {Rethinking SNN Online Training and Deployment: Gradient-Coherent Learning via Hybrid-Driven LIF Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20202-20211} }
The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qingdong and Chen, Xueqin and Pan, Yanjie and Tang, Peng and Xu, Pengcheng and Gan, Zhenye and Wang, Chengjie and Hu, Xiaobin and Zhang, Jiangning and Wang, Yabiao}, title = {The devil is in the details: Enhancing Video Virtual Try-On via Keyframe-Driven Details Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9182-9191} }
PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Xinyong and Sun, Changbin and Wang, Yong and Yang, Hongyu and Wu, Yuankai}, title = {PFGNet: A Fully Convolutional Frequency-Guided Peripheral Gating Network for Efficient Spatiotemporal Predictive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38848-38858} }
MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Gengluo and Zhang, Chengquan and Liang, Yupu and Shen, Huawen and Zhang, Yaping and Lyu, Pengyuan and Wang, Weinong and Wan, Xingyu and Zeng, Gangyan and Hu, Han and Ma, Can and Zhou, Yu}, title = {MMTIT-Bench: A Multilingual and Multi-Scenario Benchmark with Cognition-Perception-Reasoning Guided Text-Image Machine Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16593-16602} }
Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ruiz-Ponce_2026_CVPR, author = {Ruiz-Ponce, Pablo and Escalera, Sergio and Garc{\'\i}a-Rodr{\'\i}guez, Jos\'e and Deng, Jiankang and Potamias, Rolandos Alexandros}, title = {Interact2Ar: Full-Body Human-Human Interaction Generation via Autoregressive Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23559-23569} }
ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongtao and Dai, Jiaqi and Zhu, Qingtian and Li, Yilong and Su, Mai and Zhu, Fei and Gai, Meng and Wang, Shaorong and Pan, Chengwei and Chen, Yisong and Wang, Guoping}, title = {ChronoGS: Disentangling Invariants and Changes in Multi-Period Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8298-8307} }
Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuo and Wang, Yucheng and Lian, Guoxin and Wang, Yongcai and Chen, Maiyue and Wang, Kaihui and Zhang, Bo and Su, Zhizhong and Zhou, Yutian and Li, Wanting and Li, Deying and Fan, Zhaoxin}, title = {Progress-Think: Semantic Progress Reasoning for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4076-4086} }
PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gottwald_2026_CVPR, author = {Gottwald, Thomas and Heinert, Edgar and Stehr, Peter and Galappaththige, Chamuditha Jayanga and Rottmann, Matthias}, title = {PRIMU: Uncertainty Estimation for Novel Views in Gaussian Splatting from Primitive-Based Representations of Error and Coverage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11871-11880} }
A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinsheng and Yang, Zhidong and Wan, Xiaohua and Han, Renmin and Tang, Shuai and Dong, Hao and Zhang, Fa and Hu, Bin}, title = {A Supervised Multi-task Framework for Joint cryo-ET Restoration Enabled by Generative Physical Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21057-21066} }
AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop}, title = {AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17326-17335} }
RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Xuanang and Ning, Zhiwei and Zhang, Gengming and Cao, Jiaxi and Yang, Runze and Zheng, Zhonglong and Yang, Jie and Xiao, Rong and Liu, Wei}, title = {RoSAMDepth: Robust Self-supervised Depth Estimation Leveraging Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34138-34147} }
IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Jaehoon and Hu, Yi and Kim, Soopil and Jang, Jongseong and Lee, Soonyoung and Park, Sang Hyun}, title = {IVAAN: Instance-level Vision-Language Alignment via Attribute-Guided Text Prompts Generation for Nuclei Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29305-29314} }
FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Peng and Zhang, Jiapeng and Song, Yingjie and Xiao, Xiong and Tang, Zhuo}, title = {FedAlign: Differentially Private Distribution Alignment for Non-IID Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31769-31778} }
SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Kombol_2026_CVPR, author = {Kombol, Naomi and Martinovi\'c, Ivan and \v{S}egvi\'c, Sini\v{s}a and Tolias, Giorgos}, title = {SPAR: Single-Pass Any-Resolution ViT for Open-vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27579-27589} }
NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuxue and Fan, Lue and Shi, Ziqi and Peng, Junran and Wang, Feng and Zhang, Zhaoxiang}, title = {NeoVerse: Enhancing 4D World Model with in-the-wild Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40340-40351} }
PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Korzhenkov_2026_CVPR, author = {Korzhenkov, Denis and Karjauv, Adil and Karnewar, Animesh and Ghafoorian, Mohsen and Habibian, Amirhossein}, title = {PyramidalWan: On Making Pretrained Video Model Pyramidal for Efficient Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16153-16162} }
Revisiting Model Stitching In the Foundation Model Era-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Zheda and Zhang, Ke and Wang, Fu-En and Wang, Zixiao Ken and Chen, Albert Y. C. and Xia, Lu and Sun, Min and Chao, Wei-Lun and Kuo, Cheng-Hao}, title = {Revisiting Model Stitching In the Foundation Model Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41342-41351} }
Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Sattler_2026_CVPR, author = {Sattler, Torsten and Kukelova, Zuzana}, title = {Simple but Effective Triplet-Based Compression Strategies for Compact Visual Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29047-29059} }
Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Miao_2026_CVPR, author = {Miao, Shuhan and Cao, Biru and Zhuang, Junling}, title = {Nestwork: Conditional 3D Furnished House Layout Generation through Latent Heterogeneous Graph Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27093-27103} }
Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Guanting and Hu, Shenglong and Zhang, Kaihua and Liu, Guangcan and Xia, Min}, title = {Generalizable Co-Salient Object Detection via Mixed Content-Style Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32173-32183} }
SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Linkuan and Xia, Yinghao and Shen, Yufei and Li, Xiangyu and Du, Wenjie and Cong, Cong and Wei, Leyi and Su, Ran and Jin, Qiangguo}, title = {SHAPE: Structure-aware Hierarchical Unsupervised Domain Adaptation with Plausibility Evaluation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30000-30010} }
Region-Aware Instance Consistency Learning for Micro-Expression Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Yaomin and Chen, C. L. Philip and Xu, Shiting and Liu, Haiqi and Zhang, Tong}, title = {Region-Aware Instance Consistency Learning for Micro-Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34428-34438} }
Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Seng Nam and Chen, Hao and Ho, Chenglam and Mao, Xinyu and Wang, Jinping and Zhang, Yu and Li, Chao}, title = {Seeing the Scene Matters: Revealing Forgetting in Video Understanding Models with a Scene-Aware Long-Video Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4515-4525} }
Lafite: A Generative Latent Field for 3D Native Texturing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chia-Hao and Guo, Yuan-Chen and Zou, Zi-Xin and Yuan, Ze and Luo, Guan and Qi, Xiaojuan and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Lafite: A Generative Latent Field for 3D Native Texturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19960-19971} }
Obstruction Reasoning for Robotic Grasping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Runyu and Bortolon, Matteo and Giuliari, Francesco and Fasoli, Alice and Povoli, Sergio and Mei, Guofeng and Wang, Yiming and Poiesi, Fabio}, title = {Obstruction Reasoning for Robotic Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20755-20764} }
Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yesheng and Li, Hao and Xu, Haiyu and Pei, Baoqi and Wang, Jiahao and Zhao, Mingxuan and Zheng, Jing-Shu and He, Zheqi and Yao, JG and Yang, Xi and Qin, Bowen and Zhang, Jiajun}, title = {Beyond Multiple Choice: Verifiable OpenQA for Robust Vision-Language RFT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18859-18869} }
PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yichen and Li, Hong and Zhu, Haodong and Yang, Linin and Lei, Guojun and Xu, Sheng and Zhang, Baochang}, title = {PartDiffuser: Part-wise 3D Mesh Generation via Discrete Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19940-19949} }
Sampling-Aware Quantization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Qian and Song, Jie and Wan, Yuanyu and Wang, Huiqiong and Song, Mingli}, title = {Sampling-Aware Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35831-35840} }
AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Runze and Wang, Zeyue and Sun, Fanghui and Liu, Rui and Yan, Yihan and Wang, Shen and Zhang, Zhaoyang}, title = {AdvFM: Lookahead Flow-Matching Velocity-Field Attacks for Imperceptible and Transferable Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42290-42299} }
MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Xiu_2026_CVPR, author = {Xiu, Jingqiao and Wang, Can and Xu, Dong}, title = {MLLMSplat: A 2D MLLM-Powered Framework for 3D Gaussian Splatting Understanding, Generation, and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33301-33311} }
Plenoptic Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Xiao and Tang, Shitao and Shi, Min and Liu, Xian and Gu, Jinwei and Liu, Ming-Yu and Lin, Dahua and Lin, Chen-Hsuan}, title = {Plenoptic Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16142-16152} }
CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion-
[pdf]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, James Jincheng and Wu, Yuxiao and Cai, Youcheng and Liu, Ligang}, title = {CraftMesh: High-Fidelity Generative Mesh Manipulation via Poisson Seamless Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5934-5944} }
Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Yuqiao and Wang, Xu and Liang, Tengfei and Hao, Yiqing and Jin, Yi and Yu, Hui}, title = {Label What Matters: Modality-Balanced and Difficulty-Aware Multimodal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29391-29399} }
D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shengzhe and Yan, Hao}, title = {D-Convexity: A Unified Differentiable Convex Shape Prior via Quasi-Concavity for Data-driven Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34755-34764} }
ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Weiqin and Cheng, Hao and Vosselman, George and Persello, Claudio}, title = {ACPV-Net: All-Class Polygonal Vectorization for Seamless Vector Map Generation from Aerial Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13244-13253} }
Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dastmalchi_2026_CVPR, author = {Dastmalchi, Hamidreza and An, Aijun and Cheraghian, Ali and Barzamini, Hamed}, title = {Fighting Hallucinations with Counterfactuals: Diffusion-Guided Perturbations for LVLM Hallucination Suppression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4178-4187} }
PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xianqi and Yang, Hao and Wang, Hangtian and Cheng, Junda and Xu, Gangwei and Lin, Min and Yang, Xin}, title = {PromptStereo: Zero-Shot Stereo Matching via Structure and Motion Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12565-12575} }
Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Huatian and Mao, Zhendong and Zhang, Lei and Zhang, Yongdong}, title = {Uncertainty-Aware Exploratory Direct Preference Optimization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37831-37841} }
Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tokhchukov_2026_CVPR, author = {Tokhchukov, Danil and Mirzoeva, Aysel and Kuznetsov, Andrey and Sobolev, Konstantin}, title = {Calibri: Enhancing Diffusion Transformers via Parameter-Efficient Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4635-4644} }
AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaoqi and Cai, Muhe and Xu, Jiadong and Zhu, Juan and Fan, Hongwei and Shen, Yan and Ren, Guangrui and Dong, Hao}, title = {AT-VLA: Adaptive Tactile Injection for Enhanced Feedback Reaction in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28764-28774} }
Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Yunshan and Zhu, Lin and Bao, Nan and Zhao, Yifan and Li, Jia}, title = {Seeing through Light and Darkness: Sensor-Physics Grounded Deblurring HDR NeRF from Single-Exposure Images and Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22723-22732} }
SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Jain_2026_CVPR, author = {Jain, Jitesh and blank, Allen AI and Li, Jialuo and Ma, Zixian and Zhang, Jieyu and Kim, Chris Dongjoo and Lee, Sangho and Tripathi, Rohun and Gupta, Tanmay and Clark, Christopher and Shi, Humphrey}, title = {SAGE: Training Smart Any-Horizon Agents for Long Video Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41478-41488} }
AutoRegressive Generation with B-rep Holistic Token Sequence Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Bai, Yunpeng and Dai, Yongkang and Guo, Hao and Gan, Hongping and Shi, Yilei}, title = {AutoRegressive Generation with B-rep Holistic Token Sequence Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24363-24372} }
Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Lingfeng and Zhang, Yuchen and Li, Hongsheng and Fu, Haoxiang and Tang, Yingbo and Ye, Hangjun and Chen, Long and Liang, Xiaojun and Hao, Xiaoshuai and Ding, Wenbo}, title = {Is your VLM Sky-Ready? A Comprehensive Spatial Intelligence Benchmark for UAV Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25904-25913} }
Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiwen and Zhou, Hao and Qi, Huiyu and Huang, Zhao and Zhang, Guangyuan and Jiang, Shaowei and Tang, Wenwen and Yang, Bin and Liu, Jin and Zhang, Xiaoshuai and Huang, Xingru}, title = {Similarity-Consistent Likelihood Diffusion enables Hidden Person Detection from Wall Reflections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13908-13917} }
Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics-
[pdf]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Li, Yuezun and Yuan, Zhaoquan and Zhao, Shan and Su, Bing and Wu, Xiao}, title = {Optical Flow Matching: Reframing Optical Flow as Continuous Transport Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28669-28678} }
Towards Multimodal Domain Generalization with Few Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hongzhao and Dong, Hao and Wan, Hualei and Li, Shupan and Xu, Mingliang and Khan, Muhammad Haris}, title = {Towards Multimodal Domain Generalization with Few Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15070-15079} }
Smoothing the Score Function to Enhance Generalization in Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xinyu and Zhang, Jiawei and Wright, Stephen J.}, title = {Smoothing the Score Function to Enhance Generalization in Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43208-43217} }
SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haojuan and Tang, Ruohan and Cheng, Dongzhou and Zhang, Zongpu and Li, Jian and Wang, Jiaqi}, title = {SegMo: Co-Designing Content-Aware Sparsity and Locally-Cohesive Segment Parallelism for Efficient VLM Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33598-33608} }
Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Chen, Guangyao and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Reclaiming Lost Text Layers for Source-Free Cross-Domain Few-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15080-15090} }
TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yukuo and Liu, Cong and Wang, Junke and Liu, Junqi and Huang, Haibin and Wu, Zuxuan and Zhang, Chi and Li, Xuelong}, title = {TempoMaster: Efficient Long Video Generation via Next-Frame-Rate Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30414-30424} }
DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Yesiltepe_2026_CVPR, author = {Yesiltepe, Hidir and PNVR, Koutilya and Pathak, Gaurav and Bodla, Navaneeth and Singh, Bharat and Yanardag, Pinar and Xie, Jinrong}, title = {DTG-Restore: Training-Free Diffusion Refinement for Generative Video Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23335-23344} }
Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Jinhyeok and Kim, Jaehong and Kim, Jung Uk}, title = {Learning from Oblivion: Predicting Knowledge-Overflowed Weights via Retrodiction of Forgetting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39890-39900} }
EvoID: Reinforced Evolution for Identity-Preserving Video Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yiheng and Qiu, Zhaofan and Liu, Zunxu and Pan, Yingwei and Yao, Ting and Mei, Tao}, title = {EvoID: Reinforced Evolution for Identity-Preserving Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41870-41880} }
CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning-
[pdf]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Xiang and Fang, Wanlong and Wang, Changshuo}, title = {CogniVerse: Revolutionizing Multi-Modal Retrieval-Augmented Generation with Cognitive Reflection and Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7923-7935} }
Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Wenwen and Huang, Wenke and Fang, Yiyang and Qu, Wenjie and Zhang, Jiaheng and Ye, Mang}, title = {Batman: Benign Knowledge Alignment Through Malicious Null Space in Federated Backdoor Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13316-13325} }
Bezier Degradation Modeling for LiDAR-based Human Motion Capture-
[pdf]
[supp]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Xiaoqi and Zhao, Lin and Li, Jun and Gong, Chen and Yang, Jian}, title = {Bezier Degradation Modeling for LiDAR-based Human Motion Capture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14027-14037} }
Enhancing Spatial Understanding in Image Generation via Reward Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhenyu and Feng, Chaoran and Deng, Yufan and Wu, Jie and Li, Xiaojie and Wang, Rui and Chen, Yunpeng and Zhou, Daquan}, title = {Enhancing Spatial Understanding in Image Generation via Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27249-27259} }
Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xinlin and Tang, Feilong and Yang, Haolin and Liu, Xiwei and Hu, Ming and Li, Huifa and Xue, Haochen and He, Junjun and Ge, Zongyuan and Li, Yichen and Qian, Ying and Razzak, Imran}, title = {Towards Efficient Medical Reasoning with Minimal Fine-Tuning Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20223-20232} }
The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wasmuht_2026_CVPR, author = {Wasmuht, Dante and Brookes, Otto and Schall, Maximilian and Palencia, Pablo and Beirne, Christopher and Burghardt, Tilo and Mirmehdi, Majid and K\"uhl, Hjalmar and Arandjelovic, Mimi and Pottie, Sam and Bermant, Peter and Asheim, Brandon and Toh, Yi Jin and Elzinga, Adam and Holmberg, Jason Allan and Whitworth, Andrew and Flatt, Eleanor and Gustafson, Laura and Ryali, Chaitanya and Hu, Yuan-Ting and Guo, Baishan and Westbury, Andrew and Saenko, Kate and Suris, Didac}, title = {The SA-FARI Dataset: Segment Anything in Footage of Animals for Recognition and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21679-21689} }
Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yunsong and Lee, Gim Hee}, title = {Flow4DGS-SLAM: Optical Flow-Guided 4D Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33364-33373} }
Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Qiuhai and Chen, Kang and Lu, Zhengjie and Wang, Tingting and Fang, Faming and Zhang, Guixu}, title = {Adaptive Anisotropic Gaussian Splatting for Multi-contrast MRI Arbitrary-Scale Super-Resolution with Anatomy Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2188-2197} }
Perceptual 3D Simulation With Physical World Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Wanhee and Kotar, Klemen and Venkatesh, Rahul Mysore and Watrous, Jared and Yamins, Daniel LK}, title = {Perceptual 3D Simulation With Physical World Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27219-27228} }
Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiayuan and Liu, Ruoqi and Gu, Zishan and Zhang, Ping}, title = {Intervention-Aware Multiscale Representation Learning from Imaging Phenomics and Perturbation Transcriptomics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41826-41835} }
PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Boce}, title = {PMRNet: Physics-informed Multi-scale Refinement Network for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15659-15668} }
HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seunghoi and Tregidgo, Henry F. J. and Jin, Chen and Figini, Matteo and Alexander, Daniel C.}, title = {HalluGen: Synthesizing Realistic and Controllable Hallucinations for Evaluating Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32550-32560} }
NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yik San and Zhao, Runkai and Cai, Weidong}, title = {NeuroSeg Meets DINOv3: Transferring 2D Self-Supervised Visual Priors to 3D Neuron Segmentation via DINOv3 Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30053-30064} }
TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Zhengpeng and Atzberger, Clement and Jaffer, Sadiq and Knezevic, Jovana and Sormunen, Silja and Young, Robin and Lisaius, Madeline C. and Immitzer, Markus and Jackson, Toby and Ball, James and Coomes, David A. and Madhavapeddy, Anil and Blake, Andrew and Keshav, Srinivasan}, title = {TESSERA: Temporal Embeddings of Surface Spectra for Earth Representation and Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34818-34831} }
FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Cheng and Su, Zhuo and Wang, Liao and Guo, Chen and Li, Zhaohu and Long, Chengjiang and Lv, Zheng and Sun, Jingxiang and Zhang, Chenyangguang and Liu, Yebin}, title = {FlexAvatar: Flexible Large Reconstruction Model for Animatable Gaussian Head Avatars with Detailed Deformation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18229-18240} }
DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xu and Wang, Zhiru and Xie, Shiyun and Pan, Chengwei and Chen, Yisong}, title = {DualSplat: Robust 3D Gaussian Splatting via Pseudo-Mask Bootstrapping from Reconstruction Failures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4912-4921} }
Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yongrui and Zhao, Shijie and Yao, Mingde and Li, Junlin and Zhang, Li and Liu, Xiaohong and Dou, Qi and Gu, Jinwei and Xue, Tianfan}, title = {Bi-directional Autoregressive Diffusion for Large Complex Motion Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35779-35788} }
INSID3: Training-Free In-Context Segmentation with DINOv3-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Reich, Christoph and Cremers, Daniel and Masone, Carlo and Roth, Stefan}, title = {INSID3: Training-Free In-Context Segmentation with DINOv3}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21638-21648} }
Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Rong and Zha, Ruyi and Cheng, Ziang and Yang, Jiayu and Purkait, Pulak and Li, Hongdong}, title = {Towards Realistic and Consistent Orbital Video Generation via 3D Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18407-18417} }
Relightful Video Portrait Harmonization-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jun Myeong and Yoon, Jae Shin and Qi, Luchao and Sengupta, Roni and Lee, Joon-Young}, title = {Relightful Video Portrait Harmonization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23356-23366} }
ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuelu and Wang, Zhaonan and Wang, Xiaogang and Wu, Lei and Li, Manyi and Tu, Changhe}, title = {ArtPro: Self-Supervised Articulated Object Reconstruction with Adaptive Integration of Mobility Proposals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13897-13907} }
MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yujian and Liu, Hankun and Niu, Guanglin}, title = {MOS: Mitigating Optical-SAR Modality Gap for Cross-Modal Ship Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30335-30345} }
Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Shuhan and Chitta, Kashyap and Chen, Yuxiao and Tian, Ran and You, Yurong and Wang, Yan and Luo, Wenjie and Cao, Yulong and Kr\"ahenb\"uhl, Philipp and Pavone, Marco and Ivanovic, Boris}, title = {Latent Chain-of-Thought World Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39724-39733} }
Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection-
[pdf]
[bibtex]@InProceedings{Fei_2026_CVPR, author = {Fei, Jianwei and Dai, Yunshu and Zhou, Xiaoyu and Xia, Zhihua and Piva, Alessandro}, title = {Enabling Supervised Learning of Generative Signatures for Generalized AI-Generated Images Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14084-14094} }
BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Wenchao and Lin, Chuan and Huang, Sihan and Wang, Xiongzhen and Pang, Xintao}, title = {BDNet:Bio-Inspired Dual-Backbone Small Object Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32724-32734} }
RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yifan and Zhan, Fangneng and Li, Wanhua and Sun, Haowen and Fragkiadaki, Katerina and Pfister, Hanspeter}, title = {RoboTAG: End-to-end Robot Pose Estimation via Topological Alignment Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35146-35155} }
Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaodong and Wu, Zhirong and Huang, Langling and Zheng, Yuxi and Peng, Peixi}, title = {Incentivizing Versatile Video Reasoning in MLLMs via Data-Efficient Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5444-5454} }
SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaji and Sun, Ruichao and Zhao, Hailiang and Wu, Jiaju and Chen, Peng and Li, Hao and Liu, Yuying and Chow, Kingsum and Xiong, Gang and Deng, Shuiguang}, title = {SegQuant: A Semantics-Aware and Generalizable Quantization Framework for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43717-43726} }
GEM: Generating LiDAR World Model via Deformable Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yang and Liu, Zhaojiang and Meng, Qiang and Liu, Youquan and Weng, Renliang and Qian, Jianjun and Yang, Jian and Xie, Jin}, title = {GEM: Generating LiDAR World Model via Deformable Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24227-24236} }
Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Briedis_2026_CVPR, author = {Briedis, Karlis Martins and Studios and 0000-0003-4012-6292, ETH Zurich and Gross, Markus and Studios and 0009-0003-9324-779X, ETH Zurich and Schroers, Christopher and 0000-0003-1473-1878, Studios}, title = {Efficient All-Pairs Correlation Volume Sampling for Optical Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5700-5709} }
F^2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Huanjing and Li, Dawei and Tu, Shaoxiong and Yang, Jingyu}, title = {F{\textasciicircum}2HDR: Two-Stage HDR Video Reconstruction via Flow Adapter and Physical Motion Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33985-33994} }
Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Songze and Gao, Mingyu and Su, Tonghua and Zhang, Xu-Yao and Wang, Zhongjie}, title = {Multimodal Continual Instruction Tuning with Dynamic Gradient Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10820-10829} }
AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sheng, Hualian and Cai, Sijia and Yang, Yuxiao and Zhang, Weizhan and Yan, Caixia and Deng, Bing and Ye, Jieping}, title = {AnyID: Ultra-Fidelity Universal Identity-Preserving Video Generation from Any Visual References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12808-12817} }
ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Zhiyu and Chen, Shaoyu and Yin, Haoran and Zhang, Xinbang and Zou, Jialv and Wang, Xinggang and Zhang, Qian and Zhang, Lefei}, title = {ResAD: Normalized Residual Trajectory Modeling for End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3729-3739} }
Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Araslanov_2026_CVPR, author = {Araslanov, Nikita and Sundermeyer, Martin and Matsuki, Hidenobu and Tan, David Joseph and Tombari, Federico}, title = {Featurising Pixels from Dynamic 3D Scenes with Linear In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21443-21452} }
FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yizhou and Jiang, Genze and Cheng, Yihua and Wang, Kezhi}, title = {FoSS: Modeling Long-Range Dependencies and Multimodal Uncertainty in Trajectory Prediction via Fourier-State Space Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3750-3760} }
Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zixun and Dong, Yubo and Fan, Hehe and Yang, Yi}, title = {Hi-Lo Prune: Look at What You'll Lose before Pruning with Hierarchical Token Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31941-31951} }
ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Quan and Shen, Yuhao and Ji, Yicheng and Li, Huan and Wang, Cong}, title = {ParallelVLM: Lossless Video-LLM Acceleration with Visual Alignment Aware Parallel Speculative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11392-11402} }
CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sung_2026_CVPR, author = {Sung, Junyoung and Lyu, Seungwoo and Kim, Minjun and An, Sumin and Nagrani, Arsha and Seo, Paul Hongsuck}, title = {CRIT: Graph-Based Automatic Data Synthesis to Enhance Cross-Modal Multi-Hop Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19143-19154} }
Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Aobo and Wu, Jinjian and Liu, Yongxu and Ma, Jupo and Dong, Weisheng}, title = {Rethinking Knowledge Transfer in Image Quality Assessment: A Perceptual Preference Structure Alignment Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1310-1319} }
EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Yiyang and Huang, Wenke and Fu, Pei and Yang, Yihao and Su, Kehua and Luo, Zhenbo and Luan, Jian and Ye, Mang}, title = {EMO-R3: Reflective Reinforcement Learning for Emotional Reasoning in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {745-755} }
RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Timing and Wang, Feng and Wei, Guoyizhe}, title = {RNN as Linear Transformer: A Closer Investigation into Representational Potentials of Visual Mamba Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27398-27408} }
Illumination-Consistent Human-Scene Reconstruction from Monocular Video-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Rongbin and Li, Wensheng and Zeng, Lingzhe and Wang, Dong and Gao, Chengying}, title = {Illumination-Consistent Human-Scene Reconstruction from Monocular Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14050-14061} }
VideoWorld 2: Learning Transferable Knowledge from Real-world Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Zhongwei and Wei, Yunchao and Yu, Xiao and Luo, Guixun and Zhao, Yao and Kang, Bingyi and Feng, Jiashi and Jin, Xiaojie}, title = {VideoWorld 2: Learning Transferable Knowledge from Real-world Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40569-40580} }
MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hui and Lyu, Jiayue and Wang, Fu-Yun and Cheng, Kaihui and Zhu, Siyu and Wang, Jingdong}, title = {MixFlow Training: Alleviating Exposure Bias with Slowed Interpolation Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9095-9105} }
RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Ganlin and Long, Yuxi and Ali, Hafsa and Lou, Erin and Butt, Fahad and Liu, Qian and Wang, Yang and Hu, Pingzhao}, title = {RDFace: A Benchmark Dataset for Rare Disease Facial Image Analysis under Extreme Data Scarcity and Phenotype-Aware Synthetic Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42976-42986} }
TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Dong-Guw and Rhee, Tai Hyoung and Jang, Hyunsoo and Shin, Young-Sik and Shin, Ukcheol and Kim, Ayoung}, title = {TherA: Thermal-Aware Visual-Language Prompting for Controllable RGB-to-Thermal Infrared Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36803-36813} }
TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhiwei and Pang, Yitian and Wang, Weining and Sun, Zhenan and Li, Qi}, title = {TTP: Test-Time Padding for Adversarial Detection and Robust Adaptation on Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1513-1522} }
DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jin and Xi, Ning and Miao, Yinbin and Liu, Junkang}, title = {DP-FedAdamW: An Efficient Optimizer for Differentially Private Federated Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3358-3368} }
AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Hengyi and Agapito, Lourdes}, title = {AMB3R: Accurate Feed-forward Metric-scale 3D Reconstruction with Backend}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14612-14625} }
MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Alawode_2026_CVPR, author = {Alawode, Basit and Mahmood, Arif and Al Radi, Muaz Khalifa and Albastaki, Shahad and Khan, Asim and Bilal, Muhammad and Abdalla, Moshira Ali and Bennamoun, Mohammed and Javed, Sajid}, title = {MLLM-HWSI: A Multimodal Large Language Model for Hierarchical Whole Slide Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13732-13743} }
PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qingdong and Zhu, Jiajun and Zhu, Shilin and He, Xinjing and Lu, Chao and Wang, Huanran and Zhang, Jiyao}, title = {PatchScene: Patch-based Voxel Diffusion Model for Large-Scale Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16499-16508} }
Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yang and Bao, Yiwei and Lu, Feng}, title = {Through the Frequency Lens: Cross-Domain Generalisable Gaze Estimation with Adaptive Modulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42858-42868} }
PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yuheng and Zhang, Wen and Duan, Haodong and Zou, Xingxing}, title = {PosterIQ: A Design Perspective Benchmark for Poster Understanding and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29295-29304} }
Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zhixin and Liao, Bohao and Deng, Jiacheng and Yin, Xiaotian and Li, Xinjun and Chen, Yujia and Yin, Baoqun and Zhang, Tianzhu}, title = {Rethinking 2D-3D Registration: A Novel Network for High-Value Zone Selection and Representation Consistency Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39052-39063} }
Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yuedong and Wei, Xiwen and Munir, Mustafa and Marculescu, Radu}, title = {Fuel Gauge: Estimating Chain-of-Thought Length Ahead of Time in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33436-33445} }
FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Ming and Huo, Yongsheng and Dou, Mingyu and Yin, Jianfu and Zhao, Peng and Wang, Yao and Hu, Cong and Hu, Bingliang and Wang, Quan}, title = {FB-CLIP: Fine-Grained Zero-Shot Anomaly Detection with Foreground-Background Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35659-35669} }
MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zitian and Zhang, Xu and Yuan, Jianbo and Zou, Yang and Gunjal, Varad and Jiang, Songyao and Modolo, Davide}, title = {MM-ReCoder: Advancing Chart-to-Code Generation with Reinforcement Learning and Self-Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22164-22173} }
HATS: Hardness-Aware Trajectory Synthesis for GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Rui and Gao, Ruize and Xie, Bin and Li, Yixing and Zhou, Kaiwen and Wang, Shuai and Guan, Weili and Chen, Gongwei}, title = {HATS: Hardness-Aware Trajectory Synthesis for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27471-27481} }
Frequency-Aware Flow Matching for High-Quality Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Qihang and He, Ju and Shen, Xiaohui and Chen, Liang-Chieh}, title = {Frequency-Aware Flow Matching for High-Quality Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9074-9083} }
DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xinwei and Zheng, Yansong and Han, Qianru and Wang, Zhichuan and Cai, Yuxuan and Zhou, Yang and Xia, Jingbo and Wang, Yulong and Xiang, Jinhai and Bai, Xiang}, title = {DINO Eats CLIP: Adapting Beyond Knowns for Open-set 3D Object Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34704-34713} }
SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2026_CVPR, author = {Pham, Phuc and Tran, Uy Dieu and Hua, Binh-Son and Nguyen, Phong}, title = {SwiftTailor: Efficient 3D Garment Generation with Geometry Image Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27169-27178} }
MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations-
[pdf]
[supp]
[bibtex]@InProceedings{Magazine_2026_CVPR, author = {Magazine, Raghav and Li, Xingjian and Xu, Min}, title = {MedLIME: A Distribution-Aligned and Evidence-Supported Framework for Medical Saliency Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38926-38935} }
SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qingmei and Zhang, Yang and Zhang, Peifeng and Fu, Haohuan and Zheng, Juepeng}, title = {SAGE: Style-Adaptive Generalization for Privacy-Constrained Semantic Segmentation Across Domains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13134-13144} }
Rethinking Dataset Distillation: Hard Truths about Soft Labels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dey_2026_CVPR, author = {Dey, Priyam and Sahdev, Aditya and Bhati, Sunny and Mopuri, Konda Reddy and Radhakrishnan, Venkatesh Babu}, title = {Rethinking Dataset Distillation: Hard Truths about Soft Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {178-187} }
LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Hyunsoo and Yeo, Sangyeop and Yoo, Jaejun}, title = {LIFT and PLACE: A Simple, Stable, and Effective Knowledge Distillation Framework for Lightweight Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5564-5573} }
EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Litman_2026_CVPR, author = {Litman, Yehonathan and Liu, Shikun and Seyb, Dario and Milef, Nicholas and Zhou, Yang and Marshall, Carl and Tulsiani, Shubham and Leak, Caleb}, title = {EditCtrl: Disentangled Local and Global Control for Real-Time Generative Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8965-8975} }
BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Yasong and Hayder, Zeeshan and Ahmedt-Aristizabal, David and Li, Hongdong}, title = {BiFM: Bidirectional Flow Matching for Few-Step Image Editing and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23325-23334} }
Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Zhihao and Li, Chaozhuo and Zhang, Litian and Zhang, Xi}, title = {Diffusion with a Linguistic Compass: Steering the Generation of Clinically Plausible Future sMRI Representations for Early MCI Conversion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42691-42700} }
Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yun_2026_CVPR, author = {Yun, Jooyeol and Choo, Jaegul}, title = {Vector Prism: Animating Vector Graphics by Stratifying Semantic Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17316-17325} }
CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bohao and Cao, Zhicheng and Li, Huixian and Guo, Yangming}, title = {CIGPose: Causal Intervention Graph Neural Network for Whole-Body Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23484-23494} }
Lite Any Stereo: Efficient Zero-Shot Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Junpeng and Luo, Weixun and Mao, Ye and Mikolajczyk, Krystian}, title = {Lite Any Stereo: Efficient Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21725-21735} }
Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Luan_2026_CVPR, author = {Luan, Siyu and Li, Yan and Chen, Zhong and Wang, Zhenyi}, title = {Adaptive Bayesian Early-Exit Networks for Efficient Non-Transferable Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24514-24523} }
MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yannan and Tiwari, Garvita and Zhang, Xiaohan and Bora, Pankaj and Birdal, Tolga and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {MoLingo: Motion-Language Alignment for Text-to-Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38387-38398} }
Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, William and Wu, Xindi and Deng, Zhiwei and Tureci, Esin and Russakovsky, Olga}, title = {Beyond Objects: Contextual Synthetic Data Generation for Fine-Grained Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22498-22508} }
Masking Teacher and Reinforcing Student for Distilling Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Wang, Yu-Chiang Frank and Hachiuma, Ryo}, title = {Masking Teacher and Reinforcing Student for Distilling Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10126-10141} }
DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Junha and Park, Eunha and Cho, Minsu}, title = {DextER: Language-driven Dexterous Grasp Generation with Embodied Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1067-1077} }
Lighting in Motion: Spatiotemporal HDR Lighting Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bolduc_2026_CVPR, author = {Bolduc, Christophe and Philip, Julien and Ma, Li and He, Mingming and Debevec, Paul and Lalonde, Jean-Fran\c{c}ois}, title = {Lighting in Motion: Spatiotemporal HDR Lighting Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19696-19705} }
Contact-Aware Neural Dynamics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Changwei and Bandi, Jai Krishna and Ye, Jianglong and Duan, Yan and Abbeel, Pieter and Wang, Xiaolong and Yi, Sha}, title = {Contact-Aware Neural Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13442-13452} }
VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yili and Dai, Lu and Huang, Tairan and Xu, Yijie and Xiong, Hui}, title = {VL-Eraser: Vacuum Distillation for Machine Unlearning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31504-31513} }
CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Wenhao and Zhao, Zhaoran and Lu, Peng and Li, Sheng and Qiao, Qian and Li, RuiDe}, title = {CASR: A Robust Cyclic Framework for Arbitrary Large-Scale Super-Resolution with Distribution Alignment and Self-Similarity Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2136-2145} }
Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junbo and Su, Hang and Li, Zhaofan and Dong, Hang and Sun, Chao}, title = {Bootstrap Your Own AV-Proxies: Adaptive Contrastive and Prototype Learning for Audio-Visual Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23061-23071} }
CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Fengyi and Yang, Sicheng and Yang, Wenming}, title = {CoordSpeaker: Exploiting Gesture Captioning for Coordinated Caption-Empowered Co-Speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30761-30771} }
Self-Corrected Image Generation with Explainable Latent Rewards-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Yinyi and Gokhale, Hrishikesh and Savvides, Marios and Wang, Jindong and He, Shengfeng}, title = {Self-Corrected Image Generation with Explainable Latent Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20088-20097} }
FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Quanhao and Xing, Zhen and Wang, Rui and Cao, Haidong and Dai, Qi and Dong, Daoguo and Wu, Zuxuan}, title = {FlashMotion: Few-Step Controllable Video Generation with Trajectory Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8986-8996} }
Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Linjie and Xiao, Huiyu and Cao, Jiarui and Wu, Zhenyu and Ji, Yang}, title = {Quantum-Gated Task-interaction Knowledge Distillation for Pre-trained Model-based Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3920-3929} }
Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences-
[pdf]
[supp]
[bibtex]@InProceedings{Kaltheuner_2026_CVPR, author = {Kaltheuner, Julian and Dr\"oge, Hannah and Plack, Markus and Stotko, Patrick and Klein, Reinhard}, title = {Neu-PiG: Neural Preconditioned Grids for Fast Dynamic Surface Reconstruction on Long Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36539-36549} }
VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Anmin and Yang, Ruixuan and Jiang, Huiqiang and Lin, Bin and Sun, Minmin and Li, Yong and Zhang, Chen and Xie, Tao}, title = {VecAttention: Vector-wise Sparse Attention for Accelerating Long Context Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41299-41310} }
GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tianyu and Xiang, Wei and Han, Kang and Lu, Yu and Wu, Di and Liu, Gaowen and Kompella, Ramana Rao}, title = {GIFSplat: Generative Prior-Guided Iterative Feed-Forward 3D Gaussian Splatting from Sparse Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26031-26040} }
See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yongchang and Ma, Oliver and Liu, Tianyi and Zhou, Guangquan and Chen, Yang}, title = {See It, Say It, Sorted: An Iterative Training-Free Framework for Visually-Grounded Multimodal Reasoning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11933-11942} }
VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Rui and Wang, Yaqi and Li, Yadong and Geng, Ruixu and Wang, Jianyang and Ying, Qijun and Zhang, Dongheng and Hu, Yang and Chen, Yan}, title = {VRCLIP: Multimodal Canonical Correlation Alignment for CLIP-Driven Vision-Radio Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25536-25546} }
Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hesong and Wu, Ziqi and Shao, Ruiwen and Fu, Ying}, title = {Statistical Characteristic-Guided Denoising for Rapid High-Resolution Transmission Electron Microscopy Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34050-34060} }
HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ben_Ami_2026_CVPR, author = {Ben Ami, Dan and Serussi, Gabriele and Cohen, Kobi and Baskin, Chaim}, title = {HERBench: A Benchmark for Multi-Evidence Integration in Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4505-4514} }
LAMP: Language-Assisted Motion Planning for Controllable Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kizil_2026_CVPR, author = {Kizil, Muhammed Burak and Sanli, Enes and Mitra, Niloy J. and Erdem, Erkut and Erdem, Aykut and Ceylan, Duygu}, title = {LAMP: Language-Assisted Motion Planning for Controllable Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12829-12838} }
MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xuehui and Wu, Zhenyu and Xie, JingJing and Ding, Zichen and Yang, Bowen and Li, Zehao and Liu, Zhaoyang and Li, Qingyun and Dong, Xuan and Chen, Zhe and Wang, Weiyun and Zhao, Xiangyu and Chen, Jixuan and Duan, Haodong and Xie, Tianbao and Yang, Chenyu and Su, Shiqian and Yu, Yue and Zhang, Yanting and Yue, Xiangyu and Su, Weijie and Zhu, Xizhou and Shen, Wei and Dai, Jifeng and Wang, Wenhai}, title = {MMBench-GUI: A Unified Hierarchical Evaluation Framework for Multi-Platform GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6239-6248} }
Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Blanc_2026_CVPR, author = {Blanc, Hugo and Deschaud, Jean-Emmanuel and Paljic, Alexis}, title = {Hermite Radial Basis Function for Surface Reconstruction via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15376-15386} }
GDRO: Group-level Reward Post-training Suitable for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yiyang and Chen, Xi and Xu, Xiaogang and Liu, Yu and Zhao, Hengshuang}, title = {GDRO: Group-level Reward Post-training Suitable for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43505-43513} }
Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Bo and Lai, Qiuxia and Sun, Zeren and Shu, Xiangbo and Yao, Yazhou and Wang, Wenguan}, title = {Learning 3D Representations for Spatial Intelligence from Unposed Multi-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22550-22560} }
mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Chang and Jin, Beihong and Shi, Qiwen and Wang, Zhi}, title = {mmWaveFlow: Unified Enhancement and Generation of mmWave Human Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31366-31376} }
Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunjiang and Ma, Jianbo and Shen, Li and Chen, Yanru and Chen, Liangyin}, title = {Occlusion-Aware SORT: Observing Occlusion for Robust Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42560-42570} }
V-DPM: 4D Video Reconstruction with Dynamic Point Maps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sucar_2026_CVPR, author = {Sucar, Edgar and Insafutdinov, Eldar and Lai, Zihang and Vedaldi, Andrea}, title = {V-DPM: 4D Video Reconstruction with Dynamic Point Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14502-14511} }
DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Junho and Jeong, Jaemo and Kim, Hyunju and Lee, Dongman}, title = {DETACH : Decomposed Spatio-Temporal Alignment for Exocentric Video and Ambient Sensors with Staged Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12860-12870} }
TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tanvir_2026_CVPR, author = {Tanvir, Abdullah and Dasgupta, Agnibh and Zhong, Xin}, title = {TIACam: Text-Anchored Invariant Feature Learning with Auto-Augmentation for Camera-Robust Zero-Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43030-43039} }
AnthroTAP: Learning Point Tracking with Real-World Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Cho, Seokju and Koo, Jahyeok and Park, Junghyun and Huang, Jiahui and Lee, Honglak and Lee, Joon-Young and Kim, Seungryong}, title = {AnthroTAP: Learning Point Tracking with Real-World Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42516-42526} }
ViStoryBench: Comprehensive Benchmark Suite for Story Visualization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Cailin and Huang, Ailin and Hu, Yaoqi and Wu, Jingwei and Cheng, Wei and Liao, Jiaqi and Wang, Hongyuan and Liao, Xinyao and Cai, Weiwei and Xu, Hengyuan and Zhang, Xuanyang and Zeng, Xianfang and Huang, Zhewei and Yu, Gang and Zhang, Chi}, title = {ViStoryBench: Comprehensive Benchmark Suite for Story Visualization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9455-9467} }
MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation-
[pdf]
[supp]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Zeyu and Lin, Wanyu and Tan, Feng and Wang, Shujun}, title = {MMCP-GEN: A Modality-Extensible Diffusion Language Model for Conditional Protein Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15762-15772} }
Hierarchical Process Reward Models are Symbolic Vision Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shan and Chen, Aotian and Zou, Kai and Gu, Jindong and Xue, Yuan and van den Hengel, Anton}, title = {Hierarchical Process Reward Models are Symbolic Vision Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22185-22194} }
PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinyuan and Lai, Yingxin and Luo, Zhiming and Liu, Zhihui}, title = {PPM-CLIP: Probabilistic Prompt Modeling for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21316-21325} }
Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Minghe and Li, Juncheng and Lin, Yuze and Liu, Xuqi and Ji, Jiaming and Pan, Xiaoran and Xu, Zihan and Li, Xian and Li, Mingjie and Ji, Wei and Wei, Rong and Tang, Rui and Wang, Qizhou and Shen, Kai and Xiao, Jun and Wu, Qi and Tang, Siliang and Zhuang, Yueting}, title = {Arcadia: Toward a Full-Lifecycle Framework for Embodied Lifelong Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1031-1040} }
CodePercept: Code-Grounded Visual STEM Perception for MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Tongkun and Yang, Zhibo and Wan, Jianqiang and Yang, Mingkun and Guo, Zhentao and Hu, Zijian and Luo, Ruilin and Chen, Ruizhe and Jiang, Songtao and Wang, Peng and Shen, Wei and Lin, Junyang and Yang, Xiaokang}, title = {CodePercept: Code-Grounded Visual STEM Perception for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33542-33552} }
Bridging the Perception Gap in Image Super-Resolution Evaluation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Shaolin and Rocafort, Josep M. and Xue, Danna and Serrano-Lozano, David and Sun, Lei and Vazquez-Corral, Javier}, title = {Bridging the Perception Gap in Image Super-Resolution Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30532-30542} }
Collaborative Multi-Mode Pruning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zimeng and Wang, Yunhong and Wang, Donghao and Chen, Jiaxin}, title = {Collaborative Multi-Mode Pruning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39561-39571} }
Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Mukai and Dabhi, Mosam and Xie, Liuyue and Scherer, Sebastian and Jeni, L\'aszl\'o A.}, title = {Unified Spherical Frontend: Learning Rotation-Equivariant Representations of Spherical Images from Any Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6106-6115} }
RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhang, Yue and Wang, Mingdao and Zhang, Yasen and Song, Teer and Tian, Yu and Li, Xueming}, title = {RADAR: VQ-VAE Decoder of VAR is a Good Student for Restoring Against Degradation by Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5273-5282} }
Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Huafeng and Zhu, Chenguang and Lyu, Yueming and Shan, Caifeng}, title = {Beyond Weak Supervision: MLLMs-Guided Graded Knowledge Distillation for Unsupervised Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27547-27557} }
Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Junyuan and Xiang, Xunzhi and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Selective, Regularized, and Calibrated: Harnessing Vision Foundation Models for Cross-Domain Few-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12385-12395} }
LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yuanming and Li, Chengqi and He, Wenbo}, title = {LDP-Slicing: Local Differential Privacy for Images via Randomized Bit-Plane Slicing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {92-101} }
Annotation-Efficient Coreset Selection for Context-dependent Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jin and Cao, Zhe and Yang, Biwen and Zhang, Ruiheng}, title = {Annotation-Efficient Coreset Selection for Context-dependent Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20410-20420} }
DuoGen: Towards Autonomous Interleaved Multimodal Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Min and Zeng, Xiaohui and Huang, Jiannan and Cui, Yin and Ferroni, Francesco and Li, Jialuo and Li, Zhaoshuo and Balaji, Yogesh and Wang, Haoxiang and Lin, Tsung-Yi and Fu, Xiao and Zhao, Yue and Chen, Chieh-Yun and Liu, Ming-Yu and Shi, Humphrey}, title = {DuoGen: Towards Autonomous Interleaved Multimodal Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21901-21911} }
MIBURI: Towards Expressive Interactive Gesture Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mughal_2026_CVPR, author = {Mughal, M. Hamza and Dabral, Rishabh and Demberg, Vera and Theobalt, Christian}, title = {MIBURI: Towards Expressive Interactive Gesture Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40031-40041} }
Feed-forward Gaussian Registration for Head Avatar Creation and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Prinzler_2026_CVPR, author = {Prinzler, Malte and Gotardo, Paulo and Tang, Siyu and Bolkart, Timo}, title = {Feed-forward Gaussian Registration for Head Avatar Creation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25270-25280} }
Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Jinfu and Li, Jiangnan and Zhong, Xiaohui and Ren, Kangrui and Jiang, Zhencun and Gan, Min and Gu, Tianhao and Huang, Linqing}, title = {Evidential Deep Partial Label Learning to Quantify Disambiguation Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24770-24779} }
REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuewei and Bao, Xinghan and Chen, Zhimin and Li, Xi}, title = {REL-SF4PASS: Panoramic Semantic Segmentation with REL Depth Representation and Spherical Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27676-27685} }
EEGiT: Teaching Vision Transformers to Understand the EEG signal-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Jiahao and Xu, Chenghao and Wang, Wei and Yang, Erkun and Deng, Cheng}, title = {EEGiT: Teaching Vision Transformers to Understand the EEG signal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40438-40447} }
MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Junyu and She, Zhendong and Zhang, Chenghanyu and Sun, Yuchuang and Luo, Luqing and Tan, Dingwei and Guo, Zonghao and Guo, Bo and Han, Zehua and Xie, Wupeng and Mu, Yaxin and Zhang, Peng and Li, Peipei and Wang, Fengxiang and Sun, Yangang and Sun, Maosong}, title = {MERLIN: Building Low-SNR Robust Multimodal LLMs for Electromagnetic Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8653-8663} }
Think Before You Drive: World Model-Inspired Multimodal Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Haicheng and Shen, Huanming and Wang, Bonan and Li, Yongkang and Tang, Yihong and Wang, Chengyue and Zhuang, Dingyi and Chen, Kehua and Yang, Hai and Xu, Chengzhong and Li, Zhenning}, title = {Think Before You Drive: World Model-Inspired Multimodal Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3677-3687} }
Learning Surgical Robotic Manipulation with 3D Spatial Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yu and Wang, Lidian and Chu, Xiaomeng and Deng, Jiajun and Cheng, Min and Zhang, Yanyong and Hua, Bei and Li, Houqiang and Ji, Jianmin}, title = {Learning Surgical Robotic Manipulation with 3D Spatial Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42441-42451} }
Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and Xie, Yutong and Nguyen, Cam-Tu and Nguyen, Minh Khoi and Nguyen, Dang Huy Pham and van den Hengel, Anton and Verjans, Johan and Le Nguyen, Phi and Phan, Vu Minh Hieu}, title = {Beyond the Global Scores: Fine-Grained Token Grounding as a Robust Detector of LVLM Hallucinations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40235-40244} }
BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shengao and Wang, Wenqi and Wang, Zecheng and Whitton, Max and Wakeham, Michael and Chandra, Arjun and Huang, Joey and Zhu, Pengyue and Chen, Helen and Li, David and Li, Jeffrey and Li, Shawn and Zagula, Andrew and Zhao, Amy and Zhu, Andrew and Nakamura, Sayaka and Yamamoto, Yuki and Yokono, Jerry Jun and Mueller, Aaron and Plummer, Bryan A. and Saenko, Kate and Saligrama, Venkatesh and Gong, Boqing}, title = {BabyVLM-V2: Toward Developmentally Grounded Pretraining and Benchmarking of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23696-23708} }
Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Maser_2026_CVPR, author = {Maser, Raphael and Gairola, Siddhartha and Rao, Sukrut and Schiele, Bernt}, title = {Align Once to Explain: Feature Alignment for Scalable B-cosification of Foundational Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9869-9879} }
PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Ziang and Hong, Fangzhou and Chen, Zhaoxi and Pan, Liang and Liu, Ziwei}, title = {PhysX-Anything: Simulation-Ready Physical 3D Assets from Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5839-5848} }
DeDelayed: Deleting Remote Inference Delay via On-Device Correction-
[pdf]
[supp]
[bibtex]@InProceedings{Jacobellis_2026_CVPR, author = {Jacobellis, Dan and Ulhaq, Mateen and Racap\'e, Fabien and Choi, Hyomin and Yadwadkar, Neeraja J.}, title = {DeDelayed: Deleting Remote Inference Delay via On-Device Correction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19339-19348} }
HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Yihao and Ouyang, Hao and Yu, Yue and Wang, Qiuyu and Wang, Wen and Cheng, Ka Leong and Wang, Hanlin and Ma, Shuailei and Li, Yixuan and Chen, Cheng and Zeng, Yanhong and Zhu, Xing and Shen, Yujun and Qu, Huamin}, title = {HoloCine: Holistic Generation of Cinematic Multi-Shot Long Video Narratives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {461-471} }
S$^2$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Beining and Zhu, Siting and Jin, Zhao and Li, Junxian and Wang, Hesheng}, title = {S\${\textasciicircum}2\$-MLLM: Boosting Spatial Reasoning Capability of MLLMs for 3D Visual Grounding with Structural Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2557-2569} }
Region-Adaptive Sampling for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ziming and Yang, Yifan and Zhang, Chengruidong and Zhang, Yiqi and Qiu, Lili and You, Yang and Yang, Yuqing}, title = {Region-Adaptive Sampling for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2346-2356} }
Scene Reconstruction as Mapping Priors for 3D Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Yang and Zou, Yuliang and Xiang, Hao and Huang, Xin and Bai, Yijing and Song, Chen and Shi, Weijing and Thattai, Govind and Anguelov, Dragomir and Tan, Mingxing and Li, Yingwei}, title = {Scene Reconstruction as Mapping Priors for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18734-18744} }
Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuhong and Zhang, Beichen and Zang, Yuhang and Cao, Yuhang and Xing, Long and Dong, Xiaoyi and Duan, Haodong and Lin, Dahua and Wang, Jiaqi}, title = {Spatial-SSRL: Enhancing Spatial Understanding via Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9570-9581} }
SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Jiesong and Zhong, Ruizhe and Zhou, Zixiang and Mi, Xiaoyue and Hu, Long and Zhou, Yuan and Lu, Qinglin and Hao, Yixue and Yan, Junchi}, title = {SoliReward: Mitigating Susceptibility to Reward Hacking and Annotation Noise in Video Generation Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12798-12807} }
Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Weitong and Zhang, Hang and Huang, Yukai and Sun, Shitong and Deng, Jiankang and Xu, Songcen and Song, Jifei and Zhang, Zhensong}, title = {Color When It Counts: Grayscale-Guided Online Triggering for Always-On Streaming Video Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9784-9793} }
UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Geonuk and Kim, Minhoi and Lee, Kangil and Kim, Minsu and Jeon, Hyeonseong and Han, Jeonghoon and Lim, Hyoungjoon and Yim, Junho}, title = {UniSpector: Towards Universal Open-set Defect Recognition via Spectral-Contrastive Visual Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6261-6270} }
Globscope: Toward a Global View of the Loss Landscape-
[pdf]
[supp]
[bibtex]@InProceedings{Mustaq_2026_CVPR, author = {Mustaq, Mashiat and Tricoche, Xavier M.}, title = {Globscope: Toward a Global View of the Loss Landscape}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5263-5272} }
PaNDaS: Learnable Shape Interpolation Modeling with Localized Control-
[pdf]
[supp]
[bibtex]@InProceedings{Besnier_2026_CVPR, author = {Besnier, Thomas and Pierson, Emery and Arguillere, Sylvain and Ovsjanikov, Maks and Daoudi, Mohamed}, title = {PaNDaS: Learnable Shape Interpolation Modeling with Localized Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13102-13112} }
Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yunhong and Zeng, Yanhong and Li, Haobo and Ouyang, Hao and Wang, Qiuyu and Cheng, Ka Leong and Zhu, Jiapeng and Cao, Hengyuan and Zhang, Zhipeng and Zhu, Xing and Shen, Yujun and Zhang, Min}, title = {Reward Forcing: Efficient Streaming Video Generation with Rewarded Distribution Matching Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34385-34397} }
AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Mikestikova_2026_CVPR, author = {Mike\v{s}t{\'\i}kov\'a, Anna \v{S}\'arov\'a and Fourmy, M\'ed\'eric and Cifka, Martin and Sivic, Josef and Petrik, Vladimir}, title = {AlignPose: Generalizable 6D Pose Estimation via Multi-view Feature-metric Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14626-14636} }
Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ruining and Xu, Yi and Fu, Yun and Su, Lili}, title = {Den-TP: A Density-Balanced Data Curation and Evaluation Framework for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10632-10641} }
Towards Streaming Referring Video Segmentation via Large Language Model-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wenkang and Yang, Kaicheng and An, Xiang and Li, Qiang and Feng, Ziyong and Yang, Wankou and Deng, Jiankang}, title = {Towards Streaming Referring Video Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24598-24607} }
Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Songyuan and Yu, Weijiang and Liu, Ziyu and Tang, Guijian and Yang, Wenjing and Tan, Huibin and Xiao, Nong}, title = {Graph-to-Frame RAG: Visual-Space Knowledge Fusion for Training-Free and Auditable Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33805-33815} }
VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jiawei and Du, Minjie and Qin, Zihan and Wang, Zhuoran and Xie, Lizhe and Hu, Yining}, title = {VCP-Attack: Visual-Contrastive Projection for Transferable Black-Box Targeted Attacks on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30110-30119} }
iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mehrotra_2026_CVPR, author = {Mehrotra, Sarthak and Rebbapragada, Sairam VC and Bonthu, Mani and Balasubramanian, Vineeth N.}, title = {iSHIFT: Lightweight Slow-Fast GUI Agent with Adaptive Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6229-6238} }
Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiaojun and Liao, Tianchi and Liu, Zhiyuan and Chen, Chuan and Zheng, Zibin}, title = {Guiding Diffusion Models with Fine-Grained Conditions and Semantics-Preserving Sampling for One-Shot Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31779-31789} }
MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Jianming and Wang, Chengjun and Liang, Depin and Ma, Qianli and Chen, Wei and Cheng, Xueqi}, title = {MemFlow: A Lightweight Forward Memorizing Framework for Quick Domain Adaptive Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36968-36977} }
NTK-Guided Implicit Neural Teaching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and Zuo, Wei and Cheng, Bingyang and Wang, Yikun and Kou, Wei-Bin and Wu, Yik-Chung and Wong, Ngai}, title = {NTK-Guided Implicit Neural Teaching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17248-17258} }
See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Tan, Zhihong and Wei, Hongchen and Yang, Daiqin and Chen, Zhenzhong}, title = {See What We Cannot See: A Geo-guided Reasoning Benchmark for Object Counting under Adverse Earth Observation Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42191-42201} }
AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jinho and Chun, Se Young and Seok, Mingoo}, title = {AdaRadar: Rate Adaptive Spectral Compression for Radar-based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19349-19359} }
Lighting-grounded Video Generation with Renderer-based Agent Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Ziqi and Yang, Taoyu and Chang, Zheng and Li, Si and Jiang, Han and Weng, Shuchen and Shi, Boxin}, title = {Lighting-grounded Video Generation with Renderer-based Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20055-20065} }
RefAV: Towards Planning-Centric Scenario Mining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Davidson_2026_CVPR, author = {Davidson, Cainan and Ramanan, Deva and Peri, Neehar}, title = {RefAV: Towards Planning-Centric Scenario Mining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21537-21548} }
GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Daixun and Li, Zirui and He, Sibo and Tian, Jiayun and Cao, Mingxiang and Xie, Weiying and Wang, Yunke and Zhang, Xin and Zhang, Yusi and Li, Yunsong and Xu, Chang and Fang, Leyuan}, title = {GeoCoT: Towards Reliable Remote Sensing Reasoning with Manifold Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20575-20585} }
WildPose: A Unified Framework for Robust Pose Estimation in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Jianhao and Zhu, Liyuan and Zhu, Zihan and Armeni, Iro}, title = {WildPose: A Unified Framework for Robust Pose Estimation in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28903-28913} }
IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenru and Chen, Yunyi and Yang, Zijun and Zhou, Joey Tianyi and Zhang, Chi}, title = {IMS3: Breaking Distributional Aggregation in Diffusion-Based Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26667-26677} }
PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Yu and Zhu, Xiaogang and Zeng, Shan and Xiang, Wei and Bishop, Thomas Francis and Wang, Zhiyong and Hu, Kun}, title = {PhenoYieldNet: Learning Crop-Aware Phenological Responses for Multi-Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15870-15879} }
PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Guohao and Peng, Yuxin}, title = {PG-VTON: Single-Pass Training-Free Virtual Try-On via Patch-Guided Reference Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7859-7868} }
ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2026_CVPR, author = {Heo, KunHo and Kim, SuYeon and Gwon, Yonghyun and Kim, Youngbin and Cho, MyeongAh}, title = {ParTY: Part-Guidance for Expressive Text-to-Motion Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23549-23558} }
BOP-ASK: Object-Interaction Reasoning for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhat_2026_CVPR, author = {Bhat, Vineet and Kim, Sungsu and Blukis, Valts and Heinrich, Greg and Krishnamurthy, Prashanth and Karri, Ramesh and Birchfield, Stan and Khorrami, Farshad and Tremblay, Jonathan}, title = {BOP-ASK: Object-Interaction Reasoning for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16746-16757} }
Label-Free Cross-Task LoRA Merging with Null-Space Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Wonyoung and Jeong, Wooseong and Yoon, Kuk-Jin}, title = {Label-Free Cross-Task LoRA Merging with Null-Space Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {847-859} }
Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Ziyue and Hou, Jiahe and Xia, Hongyu and Xie, Xinrui and Wang, Feifei and Zhou, Yuyin and Wang, Wei and Liu, Jiawei and Qu, Liangqiong}, title = {Decoupled Residual Denoising Diffusion Models for Unified and Data Efficient Image-to-Image Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35967-35977} }
2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Caleb and Shlizerman, Eli}, title = {2ndMatch: Finetuning Pruned Diffusion Models via Second-Order Jacobian Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43385-43395} }
CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiange and Shi, Yansong and Zhu, Haoyi and Liu, Mingyu and Ma, Kaijing and Wang, Yating and Wu, Gangshan and He, Tong and Wang, Limin}, title = {CoMo: Learning Continuous Latent Motion from Internet Videos for Scalable Robot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42352-42363} }
An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Quyen and Nguyen, Hai and Dao, Quan and Phan, Hoang and Van, Linh and Than, Khoat and Phung, Dinh and Metaxas, Dimitris and Le, Trung}, title = {An Optimal Transport-driven Approach for Cultivating Latent Space in Online Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10851-10862} }
Latent Diffusion Inversion Requires Understanding the Latent Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rao_2026_CVPR, author = {Rao, Mingxing and Qu, Bowen and Moyer, Daniel}, title = {Latent Diffusion Inversion Requires Understanding the Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34971-34980} }
VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Meng and Yang, Senqiao and Che, Haoxuan and Zhang, Suiyun and Zhang, Xichen and Yu, Shaozuo and Gui, Haokun and Rao, Zhefan and Tu, Dandan and Liu, Rui and Jia, Jiaya}, title = {VisionDirector: Vision-Language Guided Closed-Loop Refinement for Generative Image Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9203-9212} }
TIGER: A Unified Framework for Time, Images and Geo-location Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shatwell_2026_CVPR, author = {Shatwell, David G. and Swetha, Sirnam and Shah, Mubarak}, title = {TIGER: A Unified Framework for Time, Images and Geo-location Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23955-23965} }
Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Junxuan and Khirodkar, Rawal and Zakharov, Egor and Lee, Jihyun and Su, Zhaoen and Dong, Yuan and Martinez, Julieta and Li, Kai and Tan, Qingyang and Shiratori, Takaaki and Hu, Matthew and Guo, Peihong and Huang, Xuhua and Jiang, Zhongshi and Yang, Lingchen and Zarei, Ariyan and Pesavento, Marco and Xu, Yichen and He, Chengan and Wen, He and Nam, Giljoo and Deng, Teng and Borsos, Wyatt and Thakrar, Anjali and Bazin, Jean-Charles and Abdrashitov, Rinat and Stoll, Carsten and Hidalgo, Gin\'es and Booth, James and Wang, Lucy and Ma, Xiaowen and Rong, Yu and Thalanki, Sairanjith and Cao, Chen and H\"ane, Christian and Kar, Abhishek and Bouaziz, Sofien and Saragih, Jason and Sheikh, Yaser and Saito, Shunsuke}, title = {Large-scale Codec Avatars: The Unreasonable Effectiveness of Large-scale Avatar Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18204-18215} }
Grounded Latents for Entity-Centric 4D Scene Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jinhyung and Sanghvi, Navyata and Weng, Erica and Hunt, Shawn and Tanaka, Shinya and Fujiyoshi, Hironobu and Kitani, Kris}, title = {Grounded Latents for Entity-Centric 4D Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21420-21430} }
PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Sixiang and Lai, Jianyu and Gao, Jialin and Shi, Hengyu and Liu, Zhongying and Ye, Tian and Luo, Junfeng and Wei, Xiaoming and Zhu, Lei}, title = {PosterOmni: Generalized Artistic Poster Creation via Task Distillation and Unified Reward Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5978-5987} }
Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junhan and Zhou, Zilu and Tong, Yujun and Chang, Dongliang and Luo, Yitao and Ma, Zhanyu}, title = {Seeing as Experts Do: A Knowledge-Augmented Agent for Open-Set Fine-Grained Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41446-41455} }
DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Kang, Jae-Young and Jeong, Yuhwan and Yang, Yunseo and Lee, Wonyoung and Kim, Youngho and Yoon, Kuk-Jin}, title = {DSERT-RoLL: Robust Multi-Modal Perception for Diverse Driving Conditions with Stereo Event-RGB-Thermal Cameras, 4D Radar, and Dual-LiDAR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33022-33035} }
Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Xun and Gu, Yufan and Hu, Disen and Hou, Yuqing and Yao, Yazhou and Shen, Fumin and Shen, Heng Tao and Xu, Xing}, title = {Multimodal Learning on Low-Quality Data with Conformal Predictive Self-Calibration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23041-23050} }
BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Varghese_2026_CVPR, author = {Varghese, Subin and Gao, Joshua and Rahman, Asad Ur and Hoskere, Vedhus}, title = {BridgeEQA: Virtual Embodied Agents for Real Bridge Inspections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8163-8173} }
A Unified Perspective on Adversarial Membership Manipulation in Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Ruize and Zhou, Kaiwen and Chen, Yongqiang and Liu, Feng}, title = {A Unified Perspective on Adversarial Membership Manipulation in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1554-1564} }
Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seongyu and Lee, Seungwoo and Ryu, Hyeonggon and Chung, Joon Son and Senocak, Arda}, title = {Seeing Through Touch: Tactile-Driven Visual Localization of Material Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8717-8726} }
Seeing Motion Through Polarity for Event-based Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Meiqi and Zhang, Jiachao and Jiang, Xin and Yan, Rui and Yao, Yazhou and Li, Zechao and Shu, Xiangbo}, title = {Seeing Motion Through Polarity for Event-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37075-37085} }
DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiaxing and Wang, Jiepeng and Gao, Junyao and Liu, Yang and Li, Eric and An, Bo and Guo, Hao-Xiang}, title = {DynamicsBoost: Dynamic Plausible Video Generation via Annotation-Free Continuation Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20024-20033} }
Ultra-Fast Neural Video Compression-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiahao and Xie, Wenxuan and Jia, Zhaoyang and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Lu, Yan}, title = {Ultra-Fast Neural Video Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41311-41321} }
Data-Centric Meta-Learning for Robust Few-Shot Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Jongmin and Cha, Soobin and Park, Jaehun and Oh, Inho and Park, Minho and Kim, Kwangsu}, title = {Data-Centric Meta-Learning for Robust Few-Shot Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5543-5552} }
MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mingrui and Liu, Hang and Ji, Jiayi and Sun, Xiaoshuai and Ji, Rongrong}, title = {MICON-Bench: Benchmarking and Enhancing Multi-Image Context Image Generation in Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8227-8236} }
Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haoming and Ma, Ke and Zhang, Ligong and Jia, Xiaojun and Sun, Yingfei and Xu, Qianqian and Huang, Qingming}, title = {Hidden Dangers of Compositional Generation: Diagnosing Semantic Safety Failures in Text-to-Image Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15700-15709} }
iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Zhoujie and Zeng, Xianfang and Lan, Jinghong and Liao, Xinyao and Chen, Cheng and Chen, Junyi and Wei, Jiacheng and Cheng, Wei and Liu, Shiyu and Chen, Yunuo and Yu, Gang and Lin, Guosheng}, title = {iMontage: Unified, Versatile, Highly Dynamic Many-to-many Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16237-16247} }
CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zening and Xie, Zhengpeng and Bai, Lichen and Shao, Shitong and Yang, Shuo and Xie, Zeke}, title = {CRAFT: Aligning Diffusion Models with Fine-Tuning Is Easier Than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35841-35850} }
Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Hongrui and Luo, Junjie and Fu, Zhihong and Zhu, Shengnan and Wen, Jiawei and Feng, Wanquan and Zhao, Songtao and He, Qian}, title = {Scaling4D: Pushing the Frontier of Video Novel View Synthesis through Large-Scale Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11174-11184} }
BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction-
[pdf]
[supp]
[bibtex]@InProceedings{Mazzucchelli_2026_CVPR, author = {Mazzucchelli, Alessio and Naranjo-Almeida, Maria and Bustos-Sanchez, Jorge and Dimiccoli, Mariella and Moreno-Noguer, Francesc and Sanchez-Riera, Jordi and Penate-Sanchez, Adrian}, title = {BEA-GS: BEyond RAdiance Supervision in 3DGS for Precise Object Extraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41054-41064} }
M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiyuan and Lin, Jia and Zhou, Xiaofei and Cong, Runmin and Liu, Deyang and Liu, Zhi}, title = {M4-SAM: Multi-Modal Mixture-of-Experts with Memory-Augmented SAM for RGB-D Video Salient Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24970-24979} }
Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Binxu and Fan, Jingxuan and Pan, Xu}, title = {Circuit Mechanisms for Spatial Relation Generation in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23612-23621} }
FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Guo, Xiao and Su, Yiyang and Jain, Anil and Liu, Xiaoming}, title = {FusionAgent: A Multimodal Agent with Dynamic Model Selection for Human Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32756-32766} }
Chain of World: World Model Thinking in Latent Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fuxiang and Di, Donglin and Tang, Lulu and Zhang, Xuancheng and Fan, Lei and Li, Hao and Chen, Wei and Su, Tonghua and Ma, Baorui}, title = {Chain of World: World Model Thinking in Latent Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6675-6684} }
Learning Forgery-Aware Lip Representations Without Forgery Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bofan and Zhu, Hongyu and He, Yi and Liang, Sichu and Wang, Shi-Lin}, title = {Learning Forgery-Aware Lip Representations Without Forgery Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42911-42921} }
MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chai_2026_CVPR, author = {Chai, Zenghao and Tang, Chen and Wong, Yongkang and Yang, Xulei and Kankanhalli, Mohan}, title = {MimiCAT: Mimic with Correspondence-Aware Cascade-Transformer for Category-Free 3D Pose Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13962-13973} }
2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Liying and Achddou, Raphael and S\"usstrunk, Sabine}, title = {2-Shots in the Dark: Low-Light Denoising with Minimal Data Acquisition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15496-15505} }
How to Take a Memorable Picture? Empowering Users with Actionable Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Laiti_2026_CVPR, author = {Laiti, Francesco and Talon, Davide and Staiano, Jacopo and Ricci, Elisa}, title = {How to Take a Memorable Picture? Empowering Users with Actionable Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29738-29749} }
Coded-E2LF: Coded Aperture Light Field Imaging from Events-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tsuchida_2026_CVPR, author = {Tsuchida, Tomoya and Takahashi, Keita and Tsutake, Chihiro and Fujii, Toshiaki and Nagahara, Hajime}, title = {Coded-E2LF: Coded Aperture Light Field Imaging from Events}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19726-19736} }
General Process Reward Modeling for Robotic Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Huajie and Chen, Sixiang and Xu, Yijie and Wang, Zixiao and Chi, Cheng and Ji, Yuheng and Lyu, Yaoxu and Zhao, Zhongxia and Chen, Xiansheng and Co, Peterson and Xie, Shaoxuan and Yao, Guocai and Wang, Pengwei and Wang, Zhongyuan and Zhang, Shanghang}, title = {General Process Reward Modeling for Robotic Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22412-22422} }
Inferring Compositional 4D Scenes without Ever Seeing One-
[pdf]
[supp]
[bibtex]@InProceedings{Gokmen_2026_CVPR, author = {G\"okmen, Ahmet Berke and Chhatkuli, Ajad and Van Gool, Luc and Paudel, Danda Pani}, title = {Inferring Compositional 4D Scenes without Ever Seeing One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {317-329} }
ZINA: Multimodal Fine-grained Hallucination Detection and Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2026_CVPR, author = {Wada, Yuiga and Matsuda, Kazuki and Sugiura, Komei and Neubig, Graham}, title = {ZINA: Multimodal Fine-grained Hallucination Detection and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32528-32538} }
Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yayuan and Jain, Aadit and Bellos, Filippos and Corso, Jason J.}, title = {Mistake Attribution: Fine-Grained Mistake Understanding in Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23966-23976} }
Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Tianyi and Ge, Yi and Li, Ming and Zhang, Zuolong and Kulkarni, Pranav and Wang, Kaishen and He, Qi and Zhu, Zeying and Liu, Chenxi and Chen, Ruibo and Zheng, Tong and Chen, Yanshuo and Wang, Xiyao and Zhang, Renrui and Chen, Wenhu and Huang, Heng}, title = {Multi-Crit: Benchmarking Multimodal Judges on Pluralistic Criteria-Following}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8641-8652} }
InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Hao and Lin, Haotong and Wang, Jiawei and Li, Jiaxin and Wang, Yida and Zhang, Xueyang and Wang, Yue and Zhou, Xiaowei and Hu, Ruizhen and Peng, Sida}, title = {InfiniDepth: Arbitrary-Resolution and Fine-Grained Depth Estimation with Neural Implicit Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26920-26930} }
OneThinker: All-in-one Reasoning Model for Image and Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Kaituo and Zhang, Manyuan and Li, Hongyu and Fan, Kaixuan and Chen, Shuang and Jiang, Yilei and Zheng, Dian and Sun, Peiwen and Zhang, Yiyuan and Sun, Haoze and Feng, Yan and Pei, Peng and Cai, Xunliang and Yue, Xiangyu}, title = {OneThinker: All-in-one Reasoning Model for Image and Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5432-5443} }
ReLaGS: Relational Language Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yaxu and Arafa, Abdalla and Javanmardi, Alireza and Millerdurai, Christen and Hu, Jia Cheng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {ReLaGS: Relational Language Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23826-23836} }
On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chongyang and Li, Mingsong and Lu, Haodong and Gong, Dong}, title = {On Token's Dilemma: Dynamic MoE with Drift-Aware Token Assignment for Continual Learning of Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3941-3952} }
UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiqi and Zhang, Xuanyu and Chen, Bin and Xie, Jingfen and Wang, Yan and Zhang, Kexin and Li, Junlin and zhang, Li and Zhang, Jian and Zhao, Shijie}, title = {UARE: A Unified Vision-Language Model for Image Quality Assessment, Restoration, and Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22689-22702} }
VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Xuehe and Li, Wenshuo and Li, Yali and Shu, Han and Wang, Yuan and Chen, Xinghao and Wang, Shengjin}, title = {VES-RFT: Rewarding Visual Evidence Sensitivity to Mitigate Hallucinations in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4168-4177} }
UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm-
[pdf]
[supp]
[bibtex]@InProceedings{Kan_2026_CVPR, author = {Kan, Zhehan and Jiang, Xinghua and Liu, Yanlin and Yang, Xiaochen and Wei, Zhixiang and Liu, Shifeng and Zhu, Yubo and Liao, Qingmin and Yang, Wenming and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {UVU: Improving Multimodal Understanding via Vision-Language Unified Autoregressive Paradigm}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26230-26239} }
Bidirectional Normalizing Flow: From Data to Noise and Back-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Yiyang and Sun, Qiao and Wang, Xianbang and Jiang, Zhicheng and Zhao, Hanhong and He, Kaiming}, title = {Bidirectional Normalizing Flow: From Data to Noise and Back}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2069-2078} }
WPT: World-to-Policy Transfer via Online World Model Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Guangfeng and Luo, Yueru and Liu, Jun and Huang, Yi and Zhu, Yiyao and Qu, Zhan and Chen, Dave Zhenyu and Liu, Bingbing and Yan, Xu}, title = {WPT: World-to-Policy Transfer via Online World Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17842-17852} }
MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Changlu and Christensen, Anders Nymark and Dahl, Anders Bjorholm and Hannemose, Morten Rieger}, title = {MaskDiME: Adaptive Masked Diffusion for Precise and Efficient Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24118-24128} }
EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chang and Jing, Tianjiao and Ma, Chengcheng and Zhou, Xuanqi and Lian, Zhengxuan and Jin, Qin and Yuan, Hongliang and Huang, Shi-Sheng}, title = {EmoDiffTalk: Emotion-aware Diffusion for Editable 3D Gaussian Talking Head}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18063-18073} }
4C4D: 4 Camera 4D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junsheng and Yang, Zhifan and Han, Liang and Zhang, Wenyuan and Shi, Kanle and Xu, Shenkun and Liu, Yu-Shen}, title = {4C4D: 4 Camera 4D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11829-11839} }
Exploring Visual Pretraining for Learning Language Intelligence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhonghan and Zhang, Yiming and Zhang, Wenwei and Zhao, Haiteng and Wei, Xingguang and Gao, Zhangwei and Liu, Kuikun and Gu, Yuzhe and Wu, Size and Huang, Haian and Gao, Jianfei and Lv, Haijun and Song, Demin and Zhou, Yunhua and Guo, Qipeng and Wang, Gaoang and Chen, Kai}, title = {Exploring Visual Pretraining for Learning Language Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31493-31503} }
CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xingru and Ye, Shuanghua and Huang, Zhao and Tang, Wenwen and Zhou, Huiyu and Zheng, Zhiwen and Liu, Jin and Zhang, Xiaoshuai}, title = {CROWn: A Unified Framework for Anti-Aliased Downsampling and Phase-Calibrated Fusion in 3D Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8514-8524} }
3D-Object Perception Transformer (3PT)-
[pdf]
[supp]
[bibtex]@InProceedings{Kalra_2026_CVPR, author = {Kalra, Agastya and Salzmann, Tim and Stoppi, Guy and Marin, Dmitrii and Agarwal, Rishav and Taamazyan, Vage and Bokeloh, Martin and Hinterstoisser, Stefan and Boykov, Anton and Dall'Olio, Alberto and Dangol, Pravin and Venkataraman, Kartik and Chen, Huaijin}, title = {3D-Object Perception Transformer (3PT)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25777-25787} }
InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Sirui and Schulter, Samuel and Ziyadi, Morteza and He, Xialin and Fei, Xiaohan and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {InterPrior: Scaling Generative Control for Physics-Based Human-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23516-23527} }
FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping-
[pdf]
[supp]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Yuanshen and Xu, Ruikang and Chen, Chang and Liao, Yinuo and Song, Dehua and Song, Fenglong and Xiong, Zhiwei}, title = {FastGaMer: Efficient GainMap Learning for Practical Inverse Tone Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22703-22712} }
Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghazanfari_2026_CVPR, author = {Ghazanfari, Sara and Croce, Francesco and Flammarion, Nicolas and Krishnamurthy, Prashanth and Khorrami, Farshad and Garg, Siddharth}, title = {Chain-of-Frames: Advancing Video Understanding in Multimodal LLMs via Frame-Aware Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2746-2755} }
Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Dongjun and Dai, Weichen and Qian, Jingsheng and Liu, Honggang and Yi, Hangjie and Kong, Wanzeng}, title = {Linguistic Priors for Visual Decoupling: Towards Symmetric Vision-Brain Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7869-7878} }
Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, ByeongCheol and Seong, Hyun Seok and Hyun, Sangeek and Park, Gilhan and Moon, WonJun and Heo, Jae-Pil}, title = {Looking Beyond the Window: Global-Local Aligned CLIP for Training-free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27686-27696} }
ReBaPL: Repulsive Bayesian Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bendou_2026_CVPR, author = {Bendou, Yassir and Ezzahir, Omar and Montesuma, Eduardo and Mahuas, Gabriel and Shevchenko, Victoria and Gartrell, Mike}, title = {ReBaPL: Repulsive Bayesian Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39962-39971} }
What Matters in Practical Learned Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tatwawadi_2026_CVPR, author = {Tatwawadi, Kedar and Rahimzadeh, Parisa and Sun, Zhanghao and Chen, Zhiqi and Yang, Ziyun and Nair, Sanjay and Hasteer, Divija and Rippel, Oren}, title = {What Matters in Practical Learned Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12095-12105} }
Model Merging in the Essential Subspace-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Longhua and Qi, Lei and Tian, Qi and Geng, Xin}, title = {Model Merging in the Essential Subspace}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31145-31154} }
Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Yiming and Li, Liang and Yin, Haibing and Gao, Yuhan and Sheng, Xichun and Yan, Chenggang}, title = {Expert-Teacher-Student Collaborative Learning for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25557-25567} }
DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuo and Miao, Bingchen and Bu, Wendong and Li, Juncheng and Zhang, Hanwang and Wu, Fei}, title = {DeepAlign: Mitigating Modality Conflict through Modality-Specific Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7847-7858} }
SG-LoRA: Semantic-guided LoRA Parameters Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Miaoge and Chen, Yang and Rao, Zhijie and Jiang, Can and Wei, Kang and Guo, Jingcai}, title = {SG-LoRA: Semantic-guided LoRA Parameters Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22206-22216} }
G$^2$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Wenbo and Lin, Jingli and Long, Yilin and Ran, Yunlong and Jiang, Lihan and Wang, Yifan and Zhu, Chenming and Xu, Runsen and Wang, Tai and Pang, Jiangmiao}, title = {G\${\textasciicircum}2\$VLM: Geometry Grounded Vision Language Model with Unified 3D Reconstruction and Spatial Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9535-9546} }
RecTok: Reconstruction Distillation along Rectified Flow-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Qingyu and Wu, Size and Bai, Jinbin and Yu, Kaidong and Wang, Yujing and Tong, Yunhai and Li, Xiangtai and Li, Xuelong}, title = {RecTok: Reconstruction Distillation along Rectified Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40685-40695} }
FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Zhengyu and Zurbr\"ugg, Ren\'e and Qu, Kaixian and Pollefeys, Marc and Hutter, Marco and Blum, Hermann and Bauer, Zuria}, title = {FunFact: Building Probabilistic Functional 3D Scene Graphs via Factor-Graph Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23848-23858} }
OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Qi and Wang, Jue and Liu, Yinhe and Zhong, Yanfei}, title = {OpenDPR: Open-Vocabulary Change Detection via Vision-Centric Diffusion-Guided Prototype Retrieval for Remote Sensing Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20399-20409} }
Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Shigeng and Xu, Hongming and Jiang, Guiyang and Rossi, Tuomo and K\"arkk\"ainen, Tommi and Cong, Fengyu}, title = {Virtual Immunohistochemistry Staining with Dual-Aligned Multi-Task Feature Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35311-35320} }
NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Liang and Marsocci, Valerio and Zhao, Wufan and Nascetti, Andrea and Vergauwen, Maarten}, title = {NeighborMAE: Exploiting Spatial Dependencies between Neighboring Earth Observation Images in Masked Autoencoders Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20597-20607} }
Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Muquan and Gou, Hang and Ma, Yingyi and Wang, Rongzheng and Qin, Ke and He, Tao}, title = {Fixed Anchors Are Not Enough: Dynamic Retrieval and Persistent Homology for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33963-33972} }
A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Kangjian and Jiang, Haobo and Qian, Jianjun and Xie, Jin}, title = {A Cross-view Fusion Framework for Robust 6-DoF Grasp Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28052-28061} }
ViT$^3$: Unlocking Test-Time Training in Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Dongchen and Li, Yining and Li, Tianyu and Cao, Zixuan and Wang, Ziming and Song, Jun and Cheng, Yu and Zheng, Bo and Huang, Gao}, title = {ViT\${\textasciicircum}3\$: Unlocking Test-Time Training in Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {51-61} }
MAD: Motion Appearance Decoupling for efficient Driving World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahimi_2026_CVPR, author = {Rahimi, Ahmad and Gerard, Valentin and Zablocki, Eloi and Cord, Matthieu and Alahi, Alexandre}, title = {MAD: Motion Appearance Decoupling for efficient Driving World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18364-18374} }
Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Jiadong and Li, Liang and Peng, Yuxin and Tang, Yu-Ming and Wang, Shuohuan and Sun, Yu and Wu, Hua and Huang, Qingming and Wang, Haifeng}, title = {Learning to Generate via Understanding: Understanding-Driven Intrinsic Rewarding for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22174-22184} }
CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Chatterjee_2026_CVPR, author = {Chatterjee, Abhiroop and Ghosh, Susmita and Ghosh, Ashish and Ientilucci, Emmett}, title = {CASPA: Graph-Structured Concept Anchors for Modality-Agnostic Adaptation in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31566-31576} }
4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chiao-An and Hachiuma, Ryo and Liu, Sifei and Radhakrishnan, Subhashree and Yeh, Raymond A. and Wang, Yu-Chiang Frank and Chen, Min-Hung}, title = {4D-RGPT: Toward Region-level 4D Understanding via Perceptual Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31042-31053} }
DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Zhifang and Li, Junhao and Ding, HaoKang and Song, Yucheng}, title = {DARC: Dual Adjustment Reasoning with Counterfactuals for Trustworthy Chest X-ray Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28234-28243} }
Explaining Object Detectors via Collective Contribution of Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Explaining Object Detectors via Collective Contribution of Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17046-17056} }
CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric-
[pdf]
[supp]
[bibtex]@InProceedings{Polamreddy_2026_CVPR, author = {Polamreddy, Lakshmikar Reddy and Ma, Ming}, title = {CG-Reasoner: Centroid-Guided Positional Reasoning Segmentation for Medical Imaging with a Robust Visual-Text Consistency Metric}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1472-1481} }
HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations-
[pdf]
[supp]
[bibtex]@InProceedings{Dabrowski_2026_CVPR, author = {Dabrowski, Matthieu and Ben Jemaa, Ouala and Allaert, Benjamin}, title = {HUMAPS-4D: A Multimodal Dataset for HUman Motion Analysis with Physiological and Semantic informations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21188-21197} }
CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2026_CVPR, author = {Chan, Alex Hoi Hang and Singhal, Neha and Kocahan, Onur and Meltzer, Andrea and Lubrano, Saverio and Warrington, Miyako H. and Griesser, Michael and Kano, Fumihiro and Naik, Hemal}, title = {CHIRP dataset: towards long-term, individual-level, behavioral monitoring of bird populations in the wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18428-18439} }
Learning to Select Visual Tools from Experience-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zeyi and Ji, Yuyang and Rajan, Anirudh Sundara and Cai, Zefan and Xiao, Wen and Wang, Haohan and Hu, Junjie and Lee, Yong Jae}, title = {Learning to Select Visual Tools from Experience}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4783-4793} }
MuM: Multi-View Masked Image Modeling for 3D Vision-
[pdf]
[supp]
[bibtex]@InProceedings{Nordstrom_2026_CVPR, author = {Nordstr\"om, David and Edstedt, Johan and Kahl, Fredrik and B\"okman, Georg}, title = {MuM: Multi-View Masked Image Modeling for 3D Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21736-21747} }
LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2026_CVPR, author = {Bang, Jaehun and Kim, Jinhyeok and Kim, Minji and Jeong, Seungheon and Joo, Kyungdon}, title = {LightSplat: Fast and Memory-Efficient Open-Vocabulary 3D Scene Understanding in Five Seconds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19812-19821} }
NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Haoyue and Xu, Jinghan and Feng, Luxin and Zhou, Hanyu and Zhao, Haozhi and Chang, Yi and Yan, Luxin}, title = {NEC-Diff: Noise-Robust Event-RAW Complementary Diffusion for Seeing Motion in Extreme Darkness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22281-22290} }
UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Kang and Liao, Xue and Xia, Junpeng and Guo, Chaozheng and Gu, Yi and Guan, Yirui and Wang, Duotun and Huang, Sheng and Wang, Zeyu}, title = {UAVLight: A Benchmark for Illumination-Robust 3D Reconstruction in Unmanned Aerial Vehicle (UAV) Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5670-5679} }
GeoSANE: Learning Geospatial Representations from Models, Not Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hanna_2026_CVPR, author = {Hanna, Jo\"elle and Falk, Damian and Yu, Stella X. and Borth, Damian}, title = {GeoSANE: Learning Geospatial Representations from Models, Not Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27804-27814} }
VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Parth Parag and Gupta, Rohit and Chhipa, Prakash Chandra and Shah, Mubarak}, title = {VidTAG: Temporally Aligned Video to GPS Geolocalization with Denoising Sequence Prediction at a Global Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23977-23987} }
EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yihan and Chen, Xuelin and Cun, Xiaodong}, title = {EasyOmnimatte: Taming Pretrained Inpainting Diffusion Models for End-to-End Video Layered Decompositio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43341-43351} }
DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Chuan and Yuan, Haoqi and Huang, Ziye and Xu, Chaoyi and Ma, Kai and Lu, Zongqing}, title = {DemoFunGrasp: Universal Dexterous Functional Grasping via Demonstration-Editing Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {986-995} }
Interactive Episodic Memory with User Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Subedi_2026_CVPR, author = {Subedi, Nikesh and Bazzani, Loris and Al-Halah, Ziad}, title = {Interactive Episodic Memory with User Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38826-38835} }
DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Zhao, Pinlong}, title = {DABO: Difficulty-Aware Bayesian Optimization with Diffusion-Learned Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6126-6135} }
PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Zhi-Yi and Markhorst, Thomas and Chew, Jouh Yeong and Zhang, Xucong}, title = {PolySLGen: Online Multimodal Speaking-Listening Reaction Generation in Polyadic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29379-29390} }
Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yang and Liu, Weiming and Dan, Jun and Xu, Tengyue and Wang, Fan and Yu, Hua and Dong, Junhao and Liu, Jiao and Dong, Shunjie and Qi, Lianyong}, title = {Bypassing the Transport Plan: Dynamic Reweighting for Out-of-Distribution Detection with Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32026-32036} }
MARCO: Navigating the Unseen Space of Semantic Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cuttano_2026_CVPR, author = {Cuttano, Claudia and Trivigno, Gabriele and Masone, Carlo and Roth, Stefan}, title = {MARCO: Navigating the Unseen Space of Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21649-21658} }
Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sobol_2026_CVPR, author = {Sobol, Ido and Sohn, Kihyuk and Blum, Yoav and Zakharov, Egor and Bluvstein, Max and Vedaldi, Andrea and Litany, Or}, title = {Realiz3D: 3D Generation Made Photorealistic via Domain-Aware Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27072-27081} }
Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wolf_2026_CVPR, author = {Wolf, Filip and Rolih, Bla\v{z} and Zajc, Luka \v{C}ehovin}, title = {Brewing Stronger Features: Dual-Teacher Distillation for Multispectral Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27815-27826} }
Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Shiyin and Long, Wei and Han, Minghao and Chen, Zhenghao and Zhu, Ce and Gu, Shuhang}, title = {Differentiable Vector Quantization for Rate-Distortion Optimization of Generative Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14440-14450} }
Advancing Image Classification with Discrete Diffusion Classification Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Belhasin_2026_CVPR, author = {Belhasin, Omer and Golan, Shelly and El-Yaniv, Ran and Elad, Michael}, title = {Advancing Image Classification with Discrete Diffusion Classification Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {124-134} }
CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Lv, Pengcheng}, title = {CoopDiff: A Diffusion-Guided Approach for Cooperation under Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11546-11555} }
Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiahan and Ran, Ran and Hou, Junming and Chen, Zihao and Cong, Xiaofeng and Li, Junling and Deng, Liang-Jian}, title = {Spatial-Spectral Residuals Informed Diffusion Neural Operator for Pan-sharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23642-23651} }
AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Yankelev_2026_CVPR, author = {Yankelev, Idan and Grolman, Edita and Levi, Yarin Yerushalmi and Giloni, Amit and Hofman, Omer and Shimizu, Toshiya and Elovici, Yuval and Shabtai, Asaf}, title = {AntiStyler: Defending Object Detection Models Against Adversarial Patch Attacks Using Style Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27936-27945} }
RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces-
[pdf]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Haonan and Ye, Xiaohui and Hua, Guang and Tao, Yihang and Cao, Hangcheng and Yu, Xiangyu and Fang, Yuguang}, title = {RecoverMark: Robust Watermarking for Localization and Recovery of Manipulated Faces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8587-8597} }
EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yumeng and Zhou, Zanwei and Zheng, Yekun and Liang, Chen and Wang, Yunbo and Yang, Xiaokang}, title = {EMGauss: Continuous Slice-to-3D Reconstruction via Dynamic Gaussian Modeling in Volume Electron Microscopy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15606-15615} }
A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Chen, Xinyue and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {A Stitch in Time: Learning Procedural Workflow via Self-Supervised Plackett-Luce Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17000-17010} }
ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zixu and Hu, Yupeng and Chen, Zhiwei and Zhang, Mingyu and Fu, Zhiheng and Nie, Liqiang}, title = {ConeSep: Cone-based Robust Noise-Unlearning Compositional Network for Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16897-16909} }
TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Chunxia and Liu, Chenyu and Xia, Pengcheng and Du, Jun and Yin, Baocai and Yin, Bing and Liu, Cong}, title = {TDATR: Improving End-to-End Table Recognition via Table Detail-Aware Learning and Cell-Level Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36838-36849} }
Foundry: Distilling 3D Foundation Models for the Edge-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Letellier_2026_CVPR, author = {Letellier, Guillaume and Srivastava, Siddharth and Jurie, Frederic and Sharma, Gaurav}, title = {Foundry: Distilling 3D Foundation Models for the Edge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17194-17203} }
Seeing Through Blur: Tackling Defocus in Spike-Based Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xiantao and Dong, Siwei and Zhu, Lin and Wang, Lizhi and Huang, Hua}, title = {Seeing Through Blur: Tackling Defocus in Spike-Based Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19676-19685} }
Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Eun Gyung and Yeom, Jewon and Jeon, Yonghoon and Kim, Taesup}, title = {Generalized and Personalized Federated Learning with Black-Box Foundation Models via Orthogonal Transformations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24567-24576} }
TopoCL: Topological Contrastive Learning for Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Guangyu and Gu, Pengfei and Liang, Peixian and Lalor, John P. and Chambers, Erin Wolf and Chen, Danny Z.}, title = {TopoCL: Topological Contrastive Learning for Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42681-42690} }
Scaling Zero-Shot Reference-to-Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijian and Liu, Shikun and Liu, Haozhe and Qiu, Haonan and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Huang, Xiaoke and Ng, Kam-Woh and Xie, Tian and Han, Xiao and Cong, Yuren and Li, Hang and Zhu, Chuyan and Patel, Aditya and Xiang, Tao and He, Sen}, title = {Scaling Zero-Shot Reference-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9253-9262} }
MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chengyue and Zhang, Mellon M. and Azarcon, Robert and Chou, Glen and Kira, Zsolt}, title = {MAPS: Preserving Vision-Language Representations via Module-Wise Proximity Scheduling for Better Vision-Language-Action Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32451-32462} }
$\oslash$ Source Models Leak What They Shouldn't $\nrightarrow$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Devalapally_2026_CVPR, author = {Devalapally, Arnav and Jain, Poornima and Srinivas, Kartik and Balasubramanian, Vineeth N.}, title = {\${\textbackslash}oslash\$ Source Models Leak What They Shouldn't \${\textbackslash}nrightarrow\$: Unlearning Zero-Shot Transfer in Domain Adaptation Through Adversarial Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1543-1553} }
Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yinuo and Cheng, Jun and Wang, Yiran and Cheng, Cheng}, title = {Spectral-Geometric Neural Fields for Pose-Free LiDAR View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2993-3003} }
SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhongyu and Yang, Zuhao and Zhan, Shuo and Yue, Tan and Pang, Wei and Yuan, Yingfang}, title = {SVAgent: Storyline-guided Long Video Understanding via Cross-Modal Multi-Agent Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24062-24072} }
VDOT: Efficient Unified Video Creation via Optimal Transport Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yutong and Zhang, Haiyu and Xue, Tianfan and Qiao, Yu and Wang, Yaohui and Xu, Chang and Chen, Xinyuan}, title = {VDOT: Efficient Unified Video Creation via Optimal Transport Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9273-9283} }
Dynamic Exposure Burst Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Woohyeok and Rim, Jaesung and Kim, Daeyeon and Cho, Sunghyun}, title = {Dynamic Exposure Burst Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15550-15560} }
TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Junyuan and Wang, Bin and Zhang, Qintong and Wu, Fan and Wen, Zichen and Lu, Jialin and Shan, Junjie and Zhao, Ziqi and Yang, Shuya and Wang, Ziling and Miao, Ziyang and Zhong, Huaping and Zang, Yuhang and Dong, Xiaoyi and Chow, Ka-Ho and He, Conghui}, title = {TRivia: Self-supervised Fine-tuning of Vision-Language Models for Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33196-33206} }
Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Jiayu and Yang, Zongxian and Chen, Guanxing and Hu, Pengwei and Tan, KC and Wang, Yan and Huang, Yu-An and Huang, Zhi-An}, title = {Hyperbolic Relational Prompts for Intersectional Fairness in Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13712-13721} }
AudioStory: Generating Long-Form Narrative Audio with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yuxin and Wang, Teng and Ge, Yuying and Ma, Shijie and Ge, Yixiao and Zou, Wei}, title = {AudioStory: Generating Long-Form Narrative Audio with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37735-37744} }
Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Boyu and Shi, Liang and Lin, Zhengzhi and Xiang, Lanxin and Stowe, Loren and Guo, Feng}, title = {Perception Characteristics Distance: Measuring Stability and Robustness of Perception System in Dynamic Conditions under a Certain Decision Rule}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4829-4838} }
DialogueVPR: Towards Conversational Visual Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yukun and Wang, Changwei and Pei, Xingtian and Xu, Shibiao and Xu, Wenhao and Chen, Shunpeng and Zhang, Yu and Zhang, Ke and Xu, Rongtao and Feng, Xuxiang and Wang, Pengyang}, title = {DialogueVPR: Towards Conversational Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41100-41110} }
RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shijie and Zhu, Bin and Yang, Jiarui and Zhao, Xiangyu and Chen, Jingjing and Jiang, Yu-Gang}, title = {RC-NF: Robot-Conditioned Normalizing Flow for Real-Time Anomaly Detection in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43050-43060} }
ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Jie and Wu, Jiahao and Wang, Chao and Yang, Jiayu and Zheng, Xiaoyun and Xiong, Kaiqiang and Wang, Zhanke and Yan, Jinbo and Gao, Feng and Wang, Ronggang}, title = {ClipGStream: Clip-Stream Gaussian Splatting for Any Length and Any Motion Multi-View Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41022-41032} }
RelightAnyone: A Generalized Relightable 3D Gaussian Head Model-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yingyan and 0000-0002-8076-1947, Studios and Rao, Pramod and Weiss, Sebastian and blank, Studios and Zoss, Gaspard and blank, Studios and Gross, Markus and Studios and blank, ETH Zurich and Theobalt, Christian and Habermann, Marc and Bradley, Derek and blank, Studios}, title = {RelightAnyone: A Generalized Relightable 3D Gaussian Head Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25258-25269} }
UniCorrn: Unified Correspondence Transformer Across 2D and 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Goswami_2026_CVPR, author = {Goswami, Prajnan and Ding, Tianye and Liu, Feng and Jiang, Huaizu}, title = {UniCorrn: Unified Correspondence Transformer Across 2D and 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9943-9954} }
GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Junpeng and Zhou, Sashuai and Li, Guanghao and Gao, Xin and Cao, Yue and Zeng, Hengyu and Yan, Yuxiang and Wang, Zhibin and Song, Jun and Zheng, Bo and Zhang, Shanghang and Pu, Jian}, title = {GIFT: Global Irreplaceability Frame Targeting for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25610-25620} }
FEAT: Fashion Editing and Try-On from Any Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Soye and Lee, Keonyoung and Jung, Dahuin and Lee, Jaekoo}, title = {FEAT: Fashion Editing and Try-On from Any Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22080-22089} }
TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Guan and Li, Xiu and Chen, Rui and Yi, Xuanyu and Lin, Jing and Chen, Chia Hao and Liu, Jiahang and Zhang, Song-Hai and Zhang, Jianfeng}, title = {TopoMesh: High-Fidelity Mesh Autoencoding via Topological Unification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27082-27092} }
TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Fengyi and Zhang, Tianjun and Khosoussi, Kasra and Zhang, Zheng and Huang, Zi and Luo, Yadan}, title = {TALO: Pushing 3D Vision Foundation Models Towards Globally Consistent Online Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21870-21879} }
Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuo and Li, Chenqi and Zhu, Tingting}, title = {Why Not Hyperparameter-Friendly Optimisation? A Monotonic Adaptive Norm Rescaling Approach For Long-Tailed Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36947-36956} }
Multi-view Pyramid Transformer: Look Coarser to See Broader-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Gyeongjin and Yang, Seungkwon and Nam, Seungtae and Lee, Younggeun and Kim, Jungwoo and Park, Eunbyung}, title = {Multi-view Pyramid Transformer: Look Coarser to See Broader}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37380-37390} }
EasyV2V: A High-quality Instruction-based Video Editing Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Jinjie and Wang, Chaoyang and Qian, Gordon Guocheng and Menapace, Willi and Tulyakov, Sergey and Ghanem, Bernard and Wonka, Peter and Mirzaei, Ashkan}, title = {EasyV2V: A High-quality Instruction-based Video Editing Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30435-30445} }
Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiawei and Gong, Xun and Fang, Fen and Yang, Muli and Qu, Bohao and Hu, Yunfeng and Chen, Hong and Yang, Xulei and Guo, Qing}, title = {Open-Ended Instruction Realization with LLM-Enabled Multi-Planner Scheduling in Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32070-32081} }
VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zepeng and Bai, Junxuan and Li, Hao and Dai, Ju and Pan, Junjun and Yin, Yongfeng and Li, Bin}, title = {VIMCAN: Visual-Inertial 3D Human Pose Estimation with Hybrid Mamba-Cross-Attention Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28458-28467} }
SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Jiahao and Zheng, Rongjia and Xu, Wenju and Zheng, Wei-Shi and Zhang, Qing}, title = {SGS-Intrinsic: Semantic-Invariant Gaussian Splatting for Sparse-View Indoor Inverse Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26021-26030} }
COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Yaoyu and Yang, Xiaochun and Liu, Hong and Wang, Leixia and Li, Jian and Ding, Rui and Wang, Bin}, title = {COPYLENS: Towards Copyrighted Characters Infringement Detection via Copyright-Aware Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24492-24502} }
OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yuxuan and Hao, Jing and Chen, Hong and Bao, Jiahao and Shao, Yihua and Liang, Yuci and Hung, Kuo Feng and Tang, Hao}, title = {OralGPT-Plus: Learning to Use Visual Tools via Reinforcement Learning for Panoramic X-ray Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35373-35383} }
LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Hwang_2026_CVPR, author = {Hwang, Hyunha and Nguyen, Xuan Truong and Lee, Hyuk-Jae}, title = {LS-ViT: Least-Squares Hessian Based Block Reconstruction for Low-Bit Post-Training Quantization of Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33588-33597} }
CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Messina_2026_CVPR, author = {Messina, Pablo and Villa, Andr\'es and Alcazar, Juan Leon and Sanchez, Karen and Hinojosa, Carlos and Parra, Denis and Soto, Alvaro and Ghanem, Bernard}, title = {CURE: Curriculum-guided Multi-task Training for Reliable Anatomy Grounded Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36279-36289} }
Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Xue, Hui and Zhu, Shipeng and Fang, Pengfei}, title = {Text-Phase Synergy Network with Dual Priors for Unsupervised Cross-Domain Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23891-23900} }
DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhe and Huang, Runhui and Yang, Rui and Yan, Siming and Wang, Zining and Hou, Lu and Lin, Di and Bai, Xiang and Zhao, Hengshuang}, title = {DrivePI: Spatial-aware 4D MLLM for Unified Autonomous Driving Understanding, Perception, Prediction and Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3688-3698} }
Learning complete and explainable visual representations from itemized text supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Yiwei and Zhao, Chenhui and Banerjee, Soumyanil and Liu, Shixuan and Rao, Akshay and Kondepudi, Akhil and Lee, Honglak and Hollon, Todd C.}, title = {Learning complete and explainable visual representations from itemized text supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21110-21120} }
Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nazarczuk_2026_CVPR, author = {Nazarczuk, Michal and Tanay, Thomas and Moreau, Arthur and Zhang, Zhensong and P\'erez-Pellitero, Eduardo}, title = {Charge: A Comprehensive Novel View Synthesis Benchmark and Dataset to Bind Them All}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15323-15333} }
Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bradbury_2026_CVPR, author = {Bradbury, Rowan and Zhong, Dazhi}, title = {Your Latent Mask is Wrong: Pixel-Equivalent Latent Compositing for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18630-18639} }
NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chambon_2026_CVPR, author = {Chambon, Lo{\"\i}ck and Couairon, Paul and Zablocki, \'Eloi and Boulch, Alexandre and Thome, Nicolas and Cord, Matthieu}, title = {NAF: Zero-Shot Feature Upsampling via Neighborhood Attention Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26604-26613} }
WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Runsheng and Lin, Hubert and Jeon, Wonseok and Feng, Hao and Zou, Yuliang and Sun, Liting and Gorman, John and Tolstaya, Kate and Tang, Sarah and White, Brandyn and Sapp, Ben and Tan, Mingxing and Hwang, Jyh-Jing and Anguelov, Dragomir}, title = {WOD-E2E: Waymo Open Dataset for End-to-End Driving in Challenging Long-tail Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3709-3718} }
NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rawal_2026_CVPR, author = {Rawal, Ishaan and Gupta, Shubh and Hu, Yihan and Zhan, Wei}, title = {NoRD: A Data-Efficient Vision-Language-Action Model that Drives without Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10965-10975} }
Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, He and Togo, Ren and Ogawa, Takahiro and Hirata, Kenji and Tang, Minghui and Yoshimura, Takaaki and Sugimori, Hiroyuki and Nishioka, Noriko and Shimizu, Yukie and Kudo, Kohsuke and Haseyama, Miki}, title = {Personalized Longitudinal Medical Report Generation via Temporally-Aware Federated Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42701-42710} }
AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Teng and Liu, Yihan and Chen, Jiongxu and Wang, Teng and Li, Jiaqi and Zhong, Bingzhuo}, title = {AR2-4FV: Anchored Referring and Re-identification for Long-Term Grounding in Fixed-View Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17568-17577} }
Sparse-View Localization via Online Neural 3D Regression-
[pdf]
[supp]
[bibtex]@InProceedings{Dillen_2026_CVPR, author = {Dill\'en, Ludvig and Oskarsson, Magnus and Larsson, Viktor}, title = {Sparse-View Localization via Online Neural 3D Regression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21794-21804} }
Free-Grained Hierarchical Visual Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Seulki and Wang, Zilin and Yu, Stella X.}, title = {Free-Grained Hierarchical Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32767-32776} }
KV-Tracker: Real-Time Pose Tracking with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taher_2026_CVPR, author = {Taher, Marwan and Alzugaray, Ignacio and Mazur, Kirill and Kong, Xin and Davison, Andrew}, title = {KV-Tracker: Real-Time Pose Tracking with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28990-28999} }
PhysGen: Physically Grounded 3D Shape Generation for Industrial Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Yingxuan and Zhao, Chen and Zhang, Hantao and Xu, Ming and Fua, Pascal}, title = {PhysGen: Physically Grounded 3D Shape Generation for Industrial Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27209-27218} }
Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kung_2026_CVPR, author = {Kung, Gustavo Chau Loo and Abbasi, Mohammad and Blank, Camila and Zhang, Juze and Wang, Alan Q. and Ostmeier, Sophie and Chaudhari, Akshay and Pohl, Kilian and Adeli, Ehsan}, title = {Diffusion MRI Transformer with a Diffusion Space Rotary Positional Embedding (D-RoPE)}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38430-38441} }
OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhuoxiao and Yu, Hongyang and Xu, Ying and Luo, Yadan and Duong, Long and Li, Yuan-Fang}, title = {OraPO: Oracle-educated Reinforcement Learning for Data-efficient and Factual Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28275-28287} }
PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mei_2026_CVPR, author = {Mei, Hefei and Wang, Zirui and Xu, Chang and Guo, Jianyuan and Dong, Minjing}, title = {PA-Attack: Guiding Gray-Box Attacks on LVLM Vision Encoders with Prototypes and Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15679-15688} }
Variation-aware Vision Token Dropping for Faster Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Xuyang and Wen, Zichen and Wang, Yiyu and Huang, Siteng and Chen, Honggang}, title = {Variation-aware Vision Token Dropping for Faster Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3489-3499} }
M4V: Multimodal Mamba for Efficient Text-to-Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiancheng and Zhang, Gengwei and Jie, Zequn and Jiao, Siyu and Qian, Yinlong and Chen, Ling and Wei, Yunchao and Ma, Lin}, title = {M4V: Multimodal Mamba for Efficient Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36747-36757} }
TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chaohu and Wang, Shida and Wang, Yubo and Xu, Linli}, title = {TableMix: Enhancing Multimodal Table Reasoning in MLLMs from a Data-Centric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33553-33565} }
Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Houston H. and Zhang, Tao and Lin, Baoze and Xue, Yuanqi and Zhu, Yincheng and Liu, Huan and Gu, Li and Ye, Linfeng and Wang, Ziqiang and Zuo, Xinxin and Wang, Yang and Yu, Yuanhao and Chi, Zhixiang}, title = {Widget2Code: From Visual Widgets to UI Code via Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20293-20302} }
IntrinsicWeather: Controllable Weather Editing in Intrinsic Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixin and Zhu, Zuo-Liang and Yang, Jian and Ha\v{s}an, Milo\v{s} and Xie, Jin and Wang, Beibei}, title = {IntrinsicWeather: Controllable Weather Editing in Intrinsic Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30772-30781} }
Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Weijia and Yang, Jingyu and Zhang, Ruojia and Sun, Fengtao and Gao, Qian and Wang, Chenyang and Su, Tongtong and Guo, Jia and Li, Xiaobai and Shao, Minglai}, title = {Active Inference for Micro-Gesture Recognition: EFE-Guided Temporal Sampling and Adaptive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13887-13896} }
Information-Theoretic Decomposition for Multimodal Interaction Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zequn and Wei, Yake and Ni, Haotian and Xu, Zhihao and Hu, Di}, title = {Information-Theoretic Decomposition for Multimodal Interaction Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30278-30287} }
Learnability-Guided Diffusion for Dataset Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Chan-Santiago_2026_CVPR, author = {Chan-Santiago, Jeffrey A. and Shah, Mubarak}, title = {Learnability-Guided Diffusion for Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41657-41666} }
Partial Weakly-Supervised Oriented Object Detection-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxin and Zhang, Peiyuan and Liu, Yuan and Zhang, Wei and Zhou, Yue and Liao, Ning and Gong, Ziyang and Luo, Junwei and Wang, Zhirui and Yu, Yi and Yang, Xue}, title = {Partial Weakly-Supervised Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27644-27654} }
When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Tao and Zhou, Chuhao and Zhao, Guangyu and Cao, Haozhi and Pu, Yewen and Yang, Jianfei}, title = {When Robots Should Say ''I Don't Know'': Benchmarking Abstention in Embodied Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15266-15275} }
FMPose3D: monocular 3D pose estimation via flow matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ti and Yu, Xiaohang and Mathis, Mackenzie Weygandt}, title = {FMPose3D: monocular 3D pose estimation via flow matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14669-14679} }
VABench: A Comprehensive Benchmark for Audio-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hua_2026_CVPR, author = {Hua, Daili and Wang, Xizhi and Zeng, Bohan and Huang, Xinyi and Liang, Hao and Niu, Junbo and Chen, Xinlong and Xu, Quanqing and Zhang, Wentao}, title = {VABench: A Comprehensive Benchmark for Audio-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23345-23355} }
Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qifan and Zhou, Xingyu and Zhang, Jinhua and You, Weiyi and Gu, Shuhang}, title = {Taming Sampling Perturbations with Variance Expansion Loss for Latent Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43643-43652} }
Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yuanfan and Peng, Kunyu and Zheng, Xu and Yang, Kailun}, title = {Seeing Beyond: Extrapolative Domain Adaptive Panoramic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42115-42125} }
PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yixiao and Li, Qingyong and Wang, Wen and Yan, Zhicheng}, title = {PointGS: Semantic-Consistent Unsupervised 3D Point Cloud Segmentation with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33343-33352} }
A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Jikang and Yan, Renye and Yan, Zhiyuan and Gan, Yaozhong and Zhang, Xueyi and Wang, Zhongyuan and Peng, Wei and Liang, Ling}, title = {A Sanity Check for Multi-In-Domain Face Forgery Detection in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21306-21315} }
DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoxue and Xiong, Ziyi and Chen, Yuantao and Li, Gen and Wang, Nan and Luo, Hongcheng and Chen, Long and Sun, Haiyang and Wang, Bing and Chen, Guang and Li, Hongyang and Zhang, Ya-Qin and Ye, Hangjun and Zhao, Hao}, title = {DGGT: Feedforward 4D Reconstruction of Dynamic Driving Scenes using Unposed Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1265-1276} }
ProPhy: Progressive Physical Alignment for Dynamic World Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zijun and Hu, Panwen and Wang, Jing and Zhang, Terry Jingchen and Cheng, Yuhao and Chen, Long and Yan, Yiqiang and Jiang, Zutao and Li, Hanhui and Liang, Xiaodan}, title = {ProPhy: Progressive Physical Alignment for Dynamic World Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14492-14501} }
FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zekai and Fan, Shuqi and Liu, Mengyin and Luo, Yuhua and Lin, Xincheng and Yan, Ming and Wu, Junhao and Lin, Xiuhong and Ma, Yuexin and Wen, Chenglu and Xu, Lan and Shen, Siqi and Wang, Cheng}, title = {FlashCap: Millisecond-Accurate Human Motion Capture via Flashing LEDs and Event-Based Vision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2221-2231} }
Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping-
[pdf]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Tianxiang and He, Hulingxiao and Peng, Yuxin}, title = {Venus: Benchmarking and Empowering Multimodal Large Language Models for Aesthetic Guidance and Cropping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37766-37776} }
IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Yihang and Huang, Shuo and Zhang, Li and Wang, Meiling and Zhang, Li}, title = {IEBGL:An Interpretability-Enhanced Brain Graph Learning Framework with LLM-Instructed Topology and Literature-Augmented Semantics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35331-35340} }
Semantic Scale Space: A Framework for Controllable Image Abstraction-
[pdf]
[supp]
[bibtex]@InProceedings{Mishiba_2026_CVPR, author = {Mishiba, Kazu}, title = {Semantic Scale Space: A Framework for Controllable Image Abstraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17367-17376} }
E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qitao and Tan, Hao and Wang, Qianqian and Bi, Sai and Zhang, Kai and Sunkavalli, Kalyan and Tulsiani, Shubham and Jiang, Hanwen}, title = {E-RayZer: Self-supervised 3D Reconstruction as Spatial Visual Pre-training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7525-7535} }
x^2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ruishan and Ruan, Ciyu and Wang, Haoyang and Gong, Zihang and Xu, Jingao and Chen, Xinlei}, title = {x{\textasciicircum}2-Fusion: Cross-Modality and Cross-Dimension Flow Estimation in Event Edge Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15145-15155} }
RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection-
[pdf]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xin and Liu, Wenjie}, title = {RPGFusion: 4D Radar Prior-Guided Multi-Modal Fusion for 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {284-294} }
Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhenyu and Zou, Yixiong and Li, Yuhua and Li, Ruixuan and Chen, Guangyao}, title = {Mind the Discriminability Trap in Source-Free Cross-domain Few-shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36978-36988} }
Block-based Learned Image Compression without Blocking Artifacts-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jong Wook and Bahk, Suyong and Lee, TaeHwa and Cho, HyunDong and Kim, Donghyun and Lim, Sung-Chang and Choi, Jin Soo and Kim, Hui Yong}, title = {Block-based Learned Image Compression without Blocking Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19330-19338} }
FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Seungho and Sung, Jeahun and Oh, Jihyong}, title = {FRAMER: Frequency-Aligned Self-Distillation with Adaptive Modulation Leveraging Diffusion Priors for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23451-23461} }
SoccerMaster: A Vision Foundation Model for Soccer Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haolin and Rao, Jiayuan and Wu, Haoning and Xie, Weidi}, title = {SoccerMaster: A Vision Foundation Model for Soccer Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21549-21560} }
Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yang and Zhang, Jiajin and Hu, Yaojun and Hao, Bingguang and Cao, Xin and Xia, Yingda and Tu, Danyang and Gu, Shi and Zhang, Ling}, title = {Rounded or Streamlined Head? Bridging Concept Bottleneck Models and Attribute-Described Object Parts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9880-9890} }
Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Kailing and Qian, Tianwen and Yang, Lijin and Fu, Yuqian and Gong, Jingyu and Wang, Xiaoling and He, Liang}, title = {Bridging the 2D-3D Gap: A Hierarchical Semantic-Geometric Map for Vision Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15243-15252} }
Progressive Supernet Training for Efficient Visual Autoregressive Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaoyue and Shi, Yuling and Li, Kaiyuan and Wang, Huandong and Li, Yong and Gu, Xiaodong and Chen, Xinlei and Lin, Mingbao}, title = {Progressive Supernet Training for Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37950-37959} }
Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuanbo and Xu, Tianyang and Hu, Cong and Zhou, Tao and Wu, Xiao-Jun and Kittler, Josef}, title = {Multi-Paradigm Collaborative Adversarial Attack Against Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30065-30075} }
SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Jiayuan and Zhao, Yiming and Guo, Zhenglong and Pan, Yong and Hou, Wenbo and Hao, Zhihui and Zhan, Kun and Chen, Qijun}, title = {SparseWorld-TC: Trajectory-Conditioned Sparse Occupancy World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7425-7434} }
Universal 3D Shape Matching via Coarse-to-Fine Language Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Qinfeng and Mei, Guofeng and Yang, Bo and Zhang, Liying and Zhang, Jian and Yick, Kit-lun}, title = {Universal 3D Shape Matching via Coarse-to-Fine Language Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13157-13167} }
Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Chenfeng and Cheng, De and Luo, Wenlong and Zeng, Mingyue and Zhang, Shizhou and Wang, Nannan and Gao, Xinbo}, title = {Incremental Object Detection via Future-Aware Decoupled Cross-Head Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39935-39944} }
VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment-
[pdf]
[supp]
[bibtex]@InProceedings{Hui_2026_CVPR, author = {Hui, Yanming and Shang, Fanhua and Liu, Hongying and Wang, Ben and Zhang, Zhenwei and Wan, Liang and Feng, Wei and Xue, Tong and Lv, Bingqin}, title = {VSRELL: A Simple Baseline for Video Super-Resolution and Enhancement in Low-Light Environment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16345-16354} }
Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Strong_2026_CVPR, author = {Strong, Matthew and Chang, Wei-Jer and Herau, Quentin and Yang, Jiezhi and Hu, Yihan and Peng, Chensheng and Zhan, Wei}, title = {Learning to Drive is a Free Gift: Large-Scale Label-Free Autonomy Pretraining from Unposed In-The-Wild Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32144-32153} }
MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Fan and Dong, Xingping and Yu, Xin and Luo, Wenhan and Liu, Wei and Zhang, Kaihao}, title = {MRD: Multi-resolution Retrieval-Detection Fusion for High-Resolution Image Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2693-2703} }
RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiahao and Liu, Joseph and Lee, Young-Yoon and Moon, Seonghyeon and Zordan, Victor and Tevet, Guy and Liu, C. Karen and Gould, Stephen and Jacob, Oren and Jiang, Haomiao and Kapadia, Mubbasir and Ben-Shabat, Yizhak}, title = {RoMo: A Large-Scale, Richly Organized Dataset and Semantic Taxonomy for Human Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16408-16419} }
TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ao and Duan, Yuxiang and Zhang, Jinghui and Ma, Congbo and Xie, Yutong and Carneiro, Gustavo and Yaqub, Mohammad and Wang, Hu}, title = {TransPrune: Token Transition Pruning for Efficient Large Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39529-39538} }
SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Ling_2026_CVPR, author = {Ling, Han and Sun, Quansen and Yao, Yinghua and Tsang, Ivor and Sun, Yinghui}, title = {SEA-Flow3D: Simplified, Efficient, and Accurate Scene Flow via Spatial Vector Sampling and Multi-scale Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36475-36484} }
SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asthana_2026_CVPR, author = {Asthana, Pranav and Hanson, Alex and Tu, Allen and Goldstein, Tom and Zwicker, Matthias and Varshney, Amitabh}, title = {SplatSuRe: Selective Super-Resolution for Multi-view Consistent 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11840-11849} }
TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Xuewei and Meng, Yajie and Zeng, Pan and Tang, Xianfang and Cui, Feifei and Jin, Qiangguo and Yang, Jialiang and Xu, Junlin}, title = {TAMER: A Tri-Modal Contrastive Alignment and Multi-Scale Embedding Refinement Framework for Zero-Shot ECG Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10502-10511} }
RunawayEvil: Jailbreaking the Image-to-Video Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Songping and Qian, Rufan and Lyu, Yueming and Liu, Qinglong and Zou, Linzhuang and Qin, Jie and Liu, Songhua and Shan, Caifeng}, title = {RunawayEvil: Jailbreaking the Image-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9296-9305} }
Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Valverde_2026_CVPR, author = {Valverde, Juan Miguel and Papadopoulos, Dim P. and Larsen, Rasmus and Dahl, Anders Bjorholm}, title = {Towards High-Quality Image Segmentation: Improving Topology Accuracy by Penalizing Neighbor Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13123-13133} }
Stronger Normalization-Free Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Mingzhi and Lu, Taiming and Zhu, Jiachen and Sun, Mingjie and Liu, Zhuang}, title = {Stronger Normalization-Free Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27418-27428} }
InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiarui and Wang, Yujin and Li, Ruikang and Zhang, Fan and Yao, Mingde and Xue, Tianfan}, title = {InstantRetouch: Efficient and High-Fidelity Instruction-Guided Image Retouching with Bilateral Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8216-8226} }
Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Dongxing and Wang, Alex Jinpeng and Tang, Jiahao and Lin, Kevin Qinghong and Li, Linjie and Yang, Zhengyuan and Wang, Lijuan and Li, Min and Tan, Jingru}, title = {Residual Decoder Adapter: ID-Preserving Tokenizer Adaption for Autoregressive Text Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22017-22027} }
When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yihuan and Xue, Jun and Jiajun, Liu and Li, Daixian and Zhang, Tong and Yi, Zhuolin and Ren, Yanzhen and Li, Kai}, title = {When AVSR Meets Video Conferencing: Dataset, Degradation, and the Hidden Mechanism Behind Performance Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4448-4457} }
A More Word-like Image Tokenization for MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Hyun and Jeong, Hyemin and Kim, Yejin and Choi, Hyungwook and Cho, Hyunsoo and Kim, Soo Kyung and Lee, Joonseok}, title = {A More Word-like Image Tokenization for MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17641-17650} }
Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiawen and Hu, Jiali and Ling, Xitong and Yan, Renao and Chen, Yuxuan and Guan, Tian and He, Yonghong}, title = {Turning Pre-Trained Vision Transformers into End-to-End Histopathology Whole Slide Image Models for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21046-21056} }
Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dachuan and Li, Weiyue and Shen, Zhenda and Qiu, Yushu and Xu, Bowen and Chen, Haoyu and Chen, Yongchao}, title = {Bias Is a Subspace, Not a Coordinate: A Geometric Rethinking of Post-hoc Debiasing in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10230-10240} }
SAMTok: Representing Any Mask with Two Words-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yikang and Zhang, Tao and Gong, Dengxian and Wu, Yuanzheng and Tian, Ye and Wang, Haochen and Yuan, Haobo and Wang, Jiacong and Qi, Lu and Fei, Hao and Ji, Shunping and Wang, Anran and Wang, Zhuochen and Wang, Yujing and Chen, Cheng and Li, Xiangtai}, title = {SAMTok: Representing Any Mask with Two Words}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37852-37863} }
Iris: Integrating Language into Diffusion-based Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyao and Ni, Jingcheng and Wang, Daniel and Rim, Patrick and Chung, Younjoon and Yang, Fengyu and Hong, Byung-Woo and Wong, Alex}, title = {Iris: Integrating Language into Diffusion-based Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34193-34205} }
MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Kehua and Mao, Tianlu and Ma, Xinzhu and Jiang, Hao and Li, Zehao and Liu, Zihan and Gao, Shuqin and Zhao, Honglong and Dai, Feng and Zhang, Yucheng and Wang, Zhaoqi}, title = {MetroGS: Efficient and Stable Reconstruction of Geometrically Accurate High-Fidelity Large-Scale Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {395-405} }
CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarfaty_2026_CVPR, author = {Sarfaty, Shahar and Haviv, Adi and Hacohen, Uri and Elkin-Koren, Niva and Livni, Roi and Bermano, Amit H.}, title = {CARLoS: Retrieval via Concise Assessment Representation of LoRAs at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23922-23932} }
DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Zihao and Li, Wentong and Jiang, Yixuan and Wang, Bin and Cong, Runmin and Qin, Jie and Huang, Shengjun}, title = {DecoVLN: Decoupling Observation, Reasoning, and Correction for Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32410-32420} }
PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Weifu and Li, Jinyang and Gao, Bin-Bin and Li, Jialin and Lin, Yuhuan and Deng, Hanqiu and Tao, Wenbing and Liu, Yong and Wang, Chengjie}, title = {PET-DINO: Unifying Visual Cues into Grounding DINO with Prompt-Enriched Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13039-13048} }
Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Feiyang and Zheng, Shenghe and Yin, Chunyan and Dou, Guangbin}, title = {Dual-Agent Reinforcement Learning for Adaptive and Cost-Aware Visual-Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24885-24894} }
UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Congpei and Hu, Zhaoyu and Ke, Wei and Tian, Zhuotao and Wu, Yanhao and Zhang, Tong}, title = {UniRefiner: Teaching Pre-trained ViTs to Self-Dispose Dross via Contrastive Register}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10061-10070} }
SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Zhiyi and Zhang, Xiaoyue and Man, Tianxing}, title = {SafeLogo: Turning Your Logos into Jailbreak Shields via Micro-Regional Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37611-37620} }
FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Zhengguo and Zhang, Fang and Hua, Yongxiang and Li, Bocheng and Zhang, Wentao and Xu, Linli}, title = {FastHybrid: Accelerating Hybrid Autoregressive Image Generation with Lookahead and Guided Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23204-23214} }
Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Mengshi and Peng, Jiaxuan and Zhang, Xianlin and Ma, Huadong}, title = {Towards Balanced Multi-Modal Learning in 3D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21231-21241} }
Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghafoorian_2026_CVPR, author = {Ghafoorian, Mohsen and Korzhenkov, Denis and Habibian, Amirhossein}, title = {Attention Surgery: An Efficient Recipe to Linearize Your Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32915-32925} }
Elastic Weight Consolidation Done Right for Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuan and Chang, Xiaobin}, title = {Elastic Weight Consolidation Done Right for Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3930-3940} }
ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Mingyang and Mishra, Ashirbad and Dey, Soumik and Xing, Shuo and Ravipati, Naveen and Wu, Hansi and Li, Binbin and Tu, Zhengzhong}, title = {ConsID-Gen: View-Consistent and Identity-Preserving Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1853-1863} }
VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyejin and Kwon, Junhyuk and Kwak, Suha and Ok, Jungseul}, title = {VIRO: Robust and Efficient Neuro-Symbolic Reasoning with Verification for Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33426-33435} }
Best Segmentation Buddies for Image-Shape Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lang_2026_CVPR, author = {Lang, Itai and Lyu, Dongwei and Decatur, Dale and Hanocka, Rana}, title = {Best Segmentation Buddies for Image-Shape Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20499-20510} }
SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Utintu_2026_CVPR, author = {Utintu, Chaitat and Song, Yi-Zhe}, title = {SketchDeco: Training-Free Latent Composition for Precise Sketch Colourisation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {484-494} }
SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis-
[pdf]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Jiangang and Du, Yiquan and Li, Pengxiang and Pei, Lili and Zhao, Yuanlin and Li, Wei}, title = {SynthRGB-T: Language-Vision Guided Image Translation for Diversity Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17259-17269} }
Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yun and Qian, Jianjun and Yang, Jian and Xie, Jin and Zhao, Na}, title = {Few-Shot Incremental 3D Object Detection in Dynamic Indoor Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18786-18795} }
Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiazhen and Feng, Mingkuan and Chen, Long}, title = {Better, Stronger, Faster: Tackling the Trilemma in MLLM-based Segmentation with Simultaneous Textual Mask Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33121-33130} }
DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Mei, Yifan and Zhou, Kun and Wu, Zelei and Zhao, Jieyu}, title = {DDSF: Robust Few-Shot Learning via Disentangled Subspaces with Determinantal Point Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19591-19601} }
VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Tairan and Wang, Zi and Xue, Haoru and Ben, Qingwei and Luo, Zhengyi and Xiao, Wenli and Yuan, Ye and Da, Xingye and Casta\~neda, Fernando and Sastry, Shankar and Liu, Changliu and Shi, Guanya and Fan, Linxi and Zhu, Yuke}, title = {VIRAL: Visual Sim-to-Real at Scale for Humanoid Loco-Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13430-13441} }
Mirror Illusion Art-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xiaopei and Li, Zeyuan and Zhu, Jun and Hu, Xiaolin}, title = {Mirror Illusion Art}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31577-31585} }
DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and T\'othov\'a, Katar{\'\i}na and Wang, Zian and Yin, Kangxue and Turki, Haithem and de Lutio, Riccardo and Chang, Yen-Yu and Litany, Or and Fidler, Sanja and Gojcic, Zan}, title = {DiffusionHarmonizer: Bridging Neural Reconstruction and Photorealistic Simulation with Online Diffusion Enhancer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43494-43504} }
Parallel Rigidity Matters for Bundle Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Manam_2026_CVPR, author = {Manam, Lalit and Govindu, Venu Madhav}, title = {Parallel Rigidity Matters for Bundle Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29035-29046} }
BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yan and Jiang, Feng and He, Zichen and Li, Xiaoqi and Liu, Yuchen and Li, Zhiyu and Wu, Ruihai and Dong, Hao}, title = {BiPreManip: Learning Affordance-Based Bimanual Preparatory Manipulation through Anticipatory Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42430-42440} }
CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Gong and Zhang, Chaokun and Tang, Tao and Lv, Pengcheng and Li, Feng and Xie, Xin}, title = {CATNet: Collaborative Alignment and Transformation Network for Cooperative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18724-18733} }
PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Zhang, Yubo and Sun, Ting and Wang, Xueqing and Liu, Hongen and Lin, Manhui and Zhang, Yue and Gao, Tingquan and Zhou, Changda and Liu, Jiaxuan and Zhang, Zelun and Zhang, Jing and Zhang, Jun and Liu, Yi}, title = {PP-OCRv5: A Specialized 5M-Parameter Model Rivaling Billion-Parameter Vision-Language Models on OCR Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2467-2476} }
RegionRoute: Regional Style Transfer with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bowen and Zuena, Jake and Bovik, Alan C. and Kothandaraman, Divya}, title = {RegionRoute: Regional Style Transfer with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35736-35746} }
SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Ziyan and Zhang, Qiudan and Ma, Lin and Wang, Xu}, title = {SO(3)-Equivariant ViT-Adapter for Data-Efficient Zero-Shot Sim-to-Real Indoor Panoramic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5740-5750} }
Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Jianing and Peng, Qinhe and Panov, Emmanuel and Fermoselle, Leonor and Jayaraman, Dinesh and Bucher, Bernadette and Kelestemur, Tarik}, title = {Expanding Spatial and Temporal Context for Robotic Imitation Learning With Scene Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28010-28020} }
Towards Training-free Scene Text Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yubo and Qin, Xugong and Zhang, Peng and Lin, Hailun and Zeng, Gangyan and Zhang, Kexin}, title = {Towards Training-free Scene Text Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15291-15301} }
ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Linqing and Liu, Yi and Wei, Yifei and Xiong, Ziyu and Liu, Si and Ren, Guanghui}, title = {ACoT-VLA: Action Chain-of-Thought for Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8152-8162} }
Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Ruihong and Shi, Xuepeng and Bailo, Oleksandr and Manfredi, Marco and Gevers, Theo}, title = {Fast SceneScript: Fast and Accurate Language-Based 3D Scene Understanding via Multi-Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2457-2466} }
MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Curvo_2026_CVPR, author = {Curvo, Pedro M. P. and van de Meent, Jan-Willem and Zhdanov, Maksim}, title = {MSPT: Efficient Large-Scale Physical Modeling via Parallelized Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12924-12933} }
Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Tianbo and Yang, Xingyi and Wang, Xinchao}, title = {Merge3D: Efficient 3D Multimodal LLMs via Joint 2D-3D Token Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31066-31077} }
Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Jian and Feng, Yujian and You, Shuai and Zhou, Zhongkai and Wu, Fei and Jing, Zhengjun and Ji, Yimu}, title = {Spatial-Frequency Collaborative Learning for Occluded Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4343-4352} }
IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence-
[pdf]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Jieren and Hu, Zhizhang and He, Ziyan and Cvetkovic, Aleksandar and Chung, Pak Kiu and Yankov, Dragomir and Zhang, Chiqun}, title = {IMAIA: Interactive Maps AI Assistant for Travel Planning and Geo-Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40064-40073} }
SARMAE: Masked Autoencoder for SAR Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Danxu and Wang, Di and Wang, Hebaixu and Chen, Haoyang and Jiang, Wentao and Cheng, Yilin and Guo, Haonan and Cui, Wei and Zhang, Jing}, title = {SARMAE: Masked Autoencoder for SAR Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6496-6507} }
ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Boshu and Jiang, Wen and Daniilidis, Kostas}, title = {ActiveGrasp: Information-Guided Active Grasping with Calibrated Energy-based Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42418-42429} }
Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xianbing and Luo, Lan and Lu, Hengyang and Tang, Buzhou}, title = {Prototype-as-Prompt: Multimodal Sentiment Prototypes Endowing Large Language Models the Capability to Perform Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23010-23020} }
TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinpeng and Xu, Yukang and Li, Yutong and Liu, Xingyu}, title = {TROPHIES: Temporal Reconstruction of Places, Humans, and Cameras from Multi-view Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21154-21164} }
SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenshuang and Kim, Kyeong Seon and Liu, Chengxin and Oh, Tae-Hyun}, title = {SVHalluc: Benchmarking Speech-Vision Hallucination in Audio-Visual Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25304-25314} }
Bridging Privacy and Provenance: Traceable Virtual Identity Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Xianhan and Hu, Xiaoxiao and Li, Sheng and Qian, Zhenxing and Zhang, Xinpeng}, title = {Bridging Privacy and Provenance: Traceable Virtual Identity Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32367-32376} }
Modeling the Visual Ambiguity of Human Sketches-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Ni, Ping and Wang, Jin and Jia, Senyun and Yan, Jingdan and Huang, Kaixiang and Lu, Guodong and Yang, Jingru and He, Shengfeng}, title = {Modeling the Visual Ambiguity of Human Sketches}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16876-16886} }
CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Shilei and Gong, Ziyang and Lin, Hehai and Liu, Yang and Cheng, Jiashun and Hu, Xiaoxing and Liang, Haoyuan and Li, Guowen and Qin, Chengwei and Cheng, Hong and Yang, Xue and Zheng, Juepeng and Fu, Haohuan}, title = {CrossEarth-Gate: Fisher-Guided Adaptive Tuning Engine for Efficient Adaptation of Cross-Domain Remote Sensing Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13222-13233} }
VMonarch: Efficient Video Diffusion Transformers with Structured Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Cheng and Chen, Haoxian and Hou, Liang and Fan, Qi and Wu, Gangshan and Tao, Xin and Wang, Limin}, title = {VMonarch: Efficient Video Diffusion Transformers with Structured Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4613-4623} }
Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhan and Wang, Leiquan and Wu, Chunlei and Meng, Yu}, title = {Convexity-Aware Noise Calibration: A Self-Supervised Framework for Noise-Level-Unknown Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29929-29938} }
SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Pengchong and Han, Zhizhong}, title = {SGAD-SLAM: Splatting Gaussians at Adjusted Depth for Better Radiance Fields in RGBD SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18934-18945} }
OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2026_CVPR, author = {Weng, Tengjin and Jiang, Wenhao and Wang, Jingyi and Li, Ming and Ma, Lin and Ming, Zhong}, title = {OddGridBench: Exposing the Lack of Fine-Grained Visual Discrepancy Sensitivity in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1575-1584} }
YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Miranda_2026_CVPR, author = {Miranda, Miro and Pathak, Deepak and Helber, Patrick and Bischke, Benjamin and Najjar, Hiba and Mena, Francisco and Sanchez, Cristhian and Pai, Akshay and Arenas, Diego and Valdenegro-Toro, Matias and Charfuelan, Marcela and Nuske, Marlon and Dengel, Andreas}, title = {YieldSAT: A Multimodal Benchmark Dataset for High-Resolution Crop Yield Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22920-22930} }
SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Ye-Chan and Cha, SeungJu and Kim, Si-Woo and Jeon, Minju and Kim, Hyungee and Kim, Dong-Jin}, title = {SAIL: Similarity-Aware Guidance and Inter-Caption Augmentation-based Learning for Weakly-Supervised Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3466-3475} }
ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Subhani_2026_CVPR, author = {Subhani, Muhammad Naseer}, title = {ReSAM: Refine, Requery, and Reinforce: Self-Prompting Point-Supervised Segmentation for Remote Sensing Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3805-3814} }
LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Che_2026_CVPR, author = {Che, Chengan and Wang, Chao and Vercauteren, Tom and Tsoka, Sophia and Garcia-Peraza-Herrera, Luis C.}, title = {LEMON: A Large Endoscopic MONocular Dataset and Foundation Model for Perception in Surgical Settings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42659-42669} }
FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhosale_2026_CVPR, author = {Bhosale, Mahesh and Wasi, Abdul and Srivastava, Shantam and Latif, Shifa and Luan, Tianyu and Gao, Mingchen and Doermann, David and Gong, Xuan}, title = {FairLLaVA: Fairness-Aware Parameter-Efficient Fine-Tuning for Large Vision-Language Assistants}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42615-42625} }
Illuminating Visual Identity in Universal Multimodal Embeddings-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Jiawei and Feng, Junyi and Hua, Jiashen and Huang, Ziheng and Deng, Bing and Wu, Kaijie and Gu, Chaochen and Ye, Jieping}, title = {Illuminating Visual Identity in Universal Multimodal Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8737-8748} }
Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaxuan and Duan, Lei and Wang, Xinye and Du, Liang}, title = {Large-scale Robust Enhanced Ensemble Clustering via Outlier Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39691-39700} }
FrankenMotion: Part-level Human Motion Generation and Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chuqiao and Xie, Xianghui and Cao, Yong and Geiger, Andreas and Pons-Moll, Gerard}, title = {FrankenMotion: Part-level Human Motion Generation and Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16420-16431} }
Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianyi and Ju, Mingye and Ma, Kai-Kuang}, title = {Disentanglement-wise Image Dehazing through Cross-Domain Manifold Consensus}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22733-22743} }
Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Shengdong and Ma, Haoxiang and Chen, Hao and Yang, Zhen and Deng, Yongjian}, title = {Event-Based Motion Deblurring Using Task-Oriented 3D Gaussian Event Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29547-29556} }
SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rong_2026_CVPR, author = {Rong, Xuankun and Huang, Wenke and Wang, Tingfeng and Zhou, Daiguo and Du, Bo and Ye, Mang}, title = {SafeGRPO: Self-Rewarded Multimodal Safety Alignment via Rule-Governed Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7901-7911} }
Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Shangjie and Dill, Jesse and Ahuja, Dhruv and Dellaert, Frank and Tsiotras, Panagiotis and Xu, Danfei}, title = {Uncertainty-driven 3D Gaussian Splatting Active Mapping via Anisotropic Visibility Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5014-5026} }
Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Si_2026_CVPR, author = {Si, Chongjie and Cui, Yidan and Yang, Fuchao and Shen, Wei}, title = {Revisiting Sparsity Constraint Under High-Rank Property in Partial Multi-Label Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17724-17733} }
One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Yushun and Chen, Yuxiang and Yin, Shibo and Hu, Qiang and Yao, Jiangchao and Zhang, Ya and Zhang, Xiaoyun and Wang, Yanfeng}, title = {One-Step Diffusion Transformer for Controllable Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23440-23450} }
Radiance Meshes for Volumetric Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Alexander and Hedstrom, Trevor and Kopanas, George and Kontkanen, Janne and Kuester, Falko and Barron, Jonathan T.}, title = {Radiance Meshes for Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8267-8277} }
Unified Primitive Proxies for Structured Shape Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhaiyu and Wang, Yuqing and Zhu, Xiao Xiang}, title = {Unified Primitive Proxies for Structured Shape Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7457-7467} }
Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Wuque and Sun, Hongze and Tang, Quan and Mao, Shifeng and Wang, Zhenxing and He, Jiayi and Chen, Duo and Yao, Dezhong and Guo, Daqing}, title = {Reconstructing Spiking Neural Networks Using a Single Neuron with Autapses}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20283-20292} }
IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magistri_2026_CVPR, author = {Magistri, Simone and Goswami, Dipam and Mistretta, Marco and Twardowski, Bart{\l}omiej and van de Weijer, Joost and Bagdanov, Andrew D.}, title = {IsoCLIP: Decomposing CLIP Projectors for Efficient Intra-modal Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29315-29324} }
RISE: Single Static Radar-based Indoor Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Kaichen and Dodds, Laura and Afzal, Sayed Saad and Adib, Fadel}, title = {RISE: Single Static Radar-based Indoor Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32194-32205} }
SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Hong and Xu, Xiangkai and Zhong, Bin and Yin, Junjie and Kang, Fangyu and Xu, Yutong and Dong, Xiugang and Gao, Xurui and Zhang, Min-Ling}, title = {SARL-STG: A Spatially Aware Reinforcement Learning Framework for Refining MLLMs in Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24630-24639} }
FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sargsyan_2026_CVPR, author = {Sargsyan, Andranik and Navasardyan, Shant}, title = {FlowDIS: Language-Guided Dichotomous Image Segmentation with Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42039-42048} }
ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bingchen and Wang, Zhixin and Li, Fan and Xu, Jiaqi and Guo, Jiaming and Pei, Renjing and Li, Xin and Chen, Zhibo}, title = {ColorFLUX: A Structure-Color Decoupling Framework for Old Photo Colorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15574-15584} }
Cross-Subject EEG-to-Video Reconstruction and Beyond-
[pdf]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Runduo and Tan, Hongchen}, title = {Cross-Subject EEG-to-Video Reconstruction and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23294-23303} }
MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Shuo and Xu, Jian and Zhang, Jiadong and Chen, Yi and Jin, Qizhao and Shen, Lingdong and Liu, Chenglin and Xiang, Shiming}, title = {MeteorPred: A Meteorological Multimodal Large Model and Dataset for Severe Weather Event Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22910-22919} }
SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Yunjie and Wu, Jingchen and Zhu, Junchen and Lin, Chunze and Chen, Guibin}, title = {SkyReels-Text: Fine-Grained Font-Controllable Text Editing for Poster Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14875-14884} }
SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Truong_2026_CVPR, author = {Truong, Bao and Nguyen, Quang and Huang, Baoru and Han, Jinpei and Nguyen, Van and Le, Ngan and Pham, Minh-Tan and Hien, Doan Huy and Nguyen, Anh}, title = {SIGMA: A Physics-Based Benchmark for Gas Chimney Understanding in Seismic Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20542-20552} }
TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Qingwen and Jiang, Chenhan and Zhu, Xiaomeng and Miao, Yunqi and Zhang, Yushan and Andersson, Olov and Jensfelt, Patric}, title = {TeFlow: Enabling Multi-frame Supervision for Self-Supervised Feed-forward Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3667-3676} }
Batch Loss Score for Dynamic Data Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qing and Zhao, Bingxuan and Yang, Tao and Zhang, Hongyuan and Gao, Junyu and Wang, Qi}, title = {Batch Loss Score for Dynamic Data Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6188-6197} }
OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Dongjian and Min, Weiqing and Jiang, Qian and Lin, Xing and Jin, Xin and Jiang, Shuqiang}, title = {OmniFood8K: Single-Image Nutrition Estimation via Hierarchical Frequency-Aligned Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41562-41572} }
SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Bo and Kang, Jiahao and Ma, Yubo and Liu, Feng-Lin and Liu, Bin and Zhang, Fang-Lue and Gao, Lin}, title = {SketchFaceGS: Real-Time Sketch-Driven Face Editing and Generation with Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40020-40030} }
DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection-
[pdf]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Chaoyu and Zhang, Han and Luo, Siqiang}, title = {DeepfakeImpact: A Two-Stage Benchmark with Real-World Impact in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35451-35461} }
Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking-
[pdf]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Andong and Zha, Ziyi and Jin, Jiandong and Li, Shihao and Li, Chenglong and Tang, Jin and Luo, Bin}, title = {Spatio-Temporal Conditional Denoising Transformer for Modality-Missing RGBT Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13584-13593} }
Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jizhou and Ding, Chenhao and He, Yuhang and Wang, Qiang and Wang, Shaokun and Dong, SongLin and Gong, Yihong}, title = {Learning Like Humans: Analogical Concept Learning for Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28734-28743} }
Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Khan_2026_CVPR, author = {Khan, Ufaq and Nawaz, Umair and Caputo, Massimo and Bilal, Muhammad and Qadir, Junaid and Khan, Muhammad Haris}, title = {Keep It Frozen: Domain-Routed Conditional Residual Modulation for Multi-Domain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21016-21025} }
Logit-Margin Repulsion for Backdoor Defense-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiguo and Xu, Dongsheng and Zhong, Ruizhi and Pi, Jiacheng and Huang, Xingxing and Ruan, Wenjie}, title = {Logit-Margin Repulsion for Backdoor Defense}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34918-34928} }
Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhuohan and Peng, Wujian and Chen, Yitong and Wu, Zuxuan}, title = {Compositional Text-to-Image Generation Via Region-aware Bimodal Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36604-36614} }
Agentic Video Summarization via Self-Reflecting Multimodal Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Miaotian and Dou, Shuguang and Li, Yin and Men, Aidong and Jiang, Dongsheng}, title = {Agentic Video Summarization via Self-Reflecting Multimodal Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40516-40526} }
SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiao and Gan, Shiwei and Yin, Yafeng and Guo, Bowen and Jiang, Zhiwei and Meng, Shunmei and Xie, Lei and Lu, Sanglu}, title = {SignPR: A Progressive Vector-Quantized Diffusion Framework for Sign Language Production}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2198-2208} }
WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yeo_2026_CVPR, author = {Yeo, Woongyeong and Kim, Kangsan and Yoon, Jaehong and Hwang, Sung Ju}, title = {WorldMM: Dynamic Multimodal Memory Agent for Long Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25599-25609} }
GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Di and Wang, Yikai and Guo, Wenjie and Bu, Yifan and Zhang, Boya and Duan, Yuexin and Yue, Xiawei and Du, Wenbiao and Zhong, Yiman and Chen, Yuwen and Ma, Cheng}, title = {GaussianPile: A Unified Sparse Gaussian Splatting Framework for Slice-based Volumetric Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19022-19032} }
Grid Distillation: Compositional Image Distillation via Structured Generative Grids-
[pdf]
[supp]
[bibtex]@InProceedings{Das_2026_CVPR, author = {Das, Biplab Ch and Das, Shouvik and Gopalakrishnan, Viswanath}, title = {Grid Distillation: Compositional Image Distillation via Structured Generative Grids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19644-19653} }
SpotEdit: Selective Region Editing in Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Zhibin and Tan, Zhenxiong and Wang, Zeqing and Liu, Songhua and Wang, Xinchao}, title = {SpotEdit: Selective Region Editing in Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18683-18692} }
Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhicheng and Liu, Yichen and Ge, Chang and Jiang, Xiaopeng}, title = {Easy2Hard: From Partially to Fully Unmatched Modalities as Negative Samples in Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30226-30234} }
Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Junda and Liu, Yanmeng and Zeng, Xiangqiang and Wu, Jinrong and Qu, Ying and Zhang, Libao}, title = {Revisiting the Necessity of Full Accuracy: Weakly Supervised Object-Level Offset Correction for Misaligned Building Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34854-34864} }
EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhenyu and Dwivedi, Sai Kumar and Maric, Filip and Chac\'on, Carlos and Bertsch, Nadine and Arcadu, Filippo and Hodan, Tomas and Ramamonjisoa, Michael and Wonka, Peter and Zhao, Amy and Kips, Robin and Keskin, Cem and Tkach, Anastasia and Yang, Chenhongyi}, title = {EgoPoseFormer v2: Accurate Egocentric Human Motion Estimation for AR/VR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21121-21131} }
Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Artru_2026_CVPR, author = {Artru, No\'e and Hussain, Rukhshanda and Got, Emeline and Messier, Alexandre and Lindell, David B. and Dib, Abdallah}, title = {Skullptor: High Fidelity 3D Head Reconstruction in Seconds with Multi-View Normal Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25248-25257} }
PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Wang, Dong and Lyu, Xinqi and Xiao, Bin}, title = {PureProof: Diffusion-Resistant Black-box Targeted Attack on Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8621-8630} }
ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Spiss_2026_CVPR, author = {Spiss, Stefan and Hieronimy, Joey and Ritter, Marcel and Harders, Matthias}, title = {ODGS-SLAM: Omnidirectional Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26114-26123} }
Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhilu and Li, Mingcheng}, title = {Factorize, Reconstruct, Enhance: A Unified Framework for Multimodal Sentiment Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15860-15869} }
LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Weilong and Li, Haipeng and Xu, Hao and Ye, Nianjin and Ai, Yihao and Liu, Shuaicheng and Hu, Jingyu}, title = {LaS-Comp: Zero-shot 3D Completion with Latent-Spatial Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7588-7599} }
EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiansong and Yang, Xiaying and Luo, Xiaoling and Shen, Linlin}, title = {EchoVDiff: Cardiac-Cycle Echocardiography Video Generation from Arbitrary Single Frame}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9040-9050} }
InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xing_2026_CVPR, author = {Xing, Chaoyue and Mao, Wei and Liu, Miaomiao}, title = {InterPhys: Physics-aware Human Motion Synthesis in a Dynamic Scene}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30729-30739} }
GS-ASM: 2DGS-Supervised Active Stereo Matching-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhengling and Lu, Rongfeng and Chen, Quan and Zeng, Longjian and Lu, Ming and Sun, Yaoqi and Chen, Yahong and Ji, Baofeng and Yan, Chenggang}, title = {GS-ASM: 2DGS-Supervised Active Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26888-26898} }
Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yulong and Liang, Tianyi and Cui, Erfei and Wang, Guoqing and Guo, Xu and Li, Chenhui and Liu, Gongshen}, title = {Consensus Entropy: Harnessing Multi-VLM Agreement for Self-Verifying and Self-Improving OCR}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11643-11653} }
Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuhua and Zhang, Qinnan and Li, Xiaodong and Zhang, Huan and Sun, Yifan and Qiu, Wangjie and Zhang, Hainan and Tong, Yongxin and Zheng, Zhiming}, title = {Taming Noise-Induced Prototype Degradation for Privacy-Preserving Personalized Federated Fine-Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39414-39424} }
AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation-
[pdf]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Luoxi and Shi, Dianxi and Cao, Yushe and Wang, Yuanze and Zhang, Junze and Cui, Yuning and Wang, Mengzhu}, title = {AIMDepth: Asymmetric Image-Event Mamba for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8033-8044} }
Interpretable and Steerable Concept Bottleneck Sparse Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kulkarni_2026_CVPR, author = {Kulkarni, Akshay and Weng, Tsui-Wei and Narayanaswamy, Vivek and Liu, Shusen and Sakla, Wesam A. and Thopalli, Kowshik}, title = {Interpretable and Steerable Concept Bottleneck Sparse Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2918-2927} }
EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network-
[pdf]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Ding, Tianle and Zhu, Yuqing and Zhang, Tianzhu}, title = {EV-CGNet: Co-visible Focused 3D-guided 2D Event Keypoint Detection Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15105-15114} }
Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yuheng and Chen, Weitong and Zhu, Chengcheng and Zhang, Jiale and Ge, Chunpeng and Wu, Di and Long, Guodong}, title = {Meta-FC: Meta-Learning with Feature Consistency for Robust and Generalizable Watermarking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17420-17429} }
Token Warping Helps MLLMs Look from Nearby Viewpoints-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Phillip Y. and Park, Chanho and Park, Mingue and Yoo, Seungwoo and Koo, Juil and Sung, Minhyuk}, title = {Token Warping Helps MLLMs Look from Nearby Viewpoints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3476-3488} }
Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Enguang and Wang, Qiang and Wu, Yuanchen and Yan, Ke and Yuan, Xinbin and Ding, Shouhong and Liu, Xialei and Cheng, Ming-Ming}, title = {Predictive Regularization Against Visual Representation Degradation in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8813-8824} }
MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Yiren and Liu, Cheng and Shou, Mike Zheng}, title = {MakeAnything: Harnessing Diffusion Transformers for Multi-Domain Procedural Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11482-11492} }
Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Keyang and Wu, Hongzhi and Zhou, Kun}, title = {Depth Peeling for High-Fidelity Gaussian-Enhanced Surfel Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22561-22570} }
Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Yihang and Hu, Senkang and An, Haonan and Fang, Zhengru and Cao, Hangcheng and Fang, Yuguang}, title = {Learning Mutual View Information Graph for Adaptive Adversarial Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42321-42330} }
Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Xueyang and Li, Zizhao and Lan, Tian and Gong, Dong and Khoshelham, Kourosh and Nan, Liangliang}, title = {Hierarchical Point-Patch Fusion with Adaptive Patch Codebook for 3D Shape Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24258-24267} }
Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sunghyun and Kim, Jeongho and Park, Hyoungwoo and Das, Debasmit and Yun, Sungrack and Hayat, Munawar and Choo, Jaegul and Porikli, Fatih and Choi, Seokeon}, title = {Memory-Efficient Fine-Tuning Diffusion Transformers via Dynamic Patch Sampling and Block Skipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11504-11514} }
Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Qinzheng and Liu, Zaychik and Lu, Lijing and Li, Zhihang}, title = {Confidence-Guided Multi-Scale Aggregation for Sparse-View High-Resolution 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19054-19064} }
pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning-
[pdf]
[supp]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Chengyao and Wu, Jing and Le, Trung and Phung, Dinh and Harandi, Mehrtash}, title = {pH-Strips for Selective Forgetting: A Blunt but Fast Diagnostic Baseline for Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3306-3315} }
ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ziqiao and Chen, Yi and Ma, Yifeng and Zhang, Guozhen and Sun, Zhiyao and Zhou, Zixiang and Zhang, Youliang and Zhou, Zhengguang and Fan, Zhaoxin and Liu, Hongyan and Zhou, Yuan and Lu, Qinglin and He, Jun}, title = {ActAvatar: Temporally-Aware Precise Action Control for Talking Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39983-39993} }
Enhancing Video Vision Language Model with Hippocampal Sensing-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Xu}, title = {Enhancing Video Vision Language Model with Hippocampal Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33682-33692} }
EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zhenghao and Wang, Huiqun and Huang, Di}, title = {EgoMind: Activating Spatial Cognition through Linguistic Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38616-38626} }
COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Siyi and Lin, Jinliang and Weng, Juanjuan and Liu, Zhihui and Li, Shaozi and Luo, Zhiming}, title = {COPE: Consistent Occlusion and Prompt Enhancement Network for Occluded Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11293-11302} }
SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Minzhang and Shao, Kuixiang and Li, Xuebing and Jiao, Yuyang and Bai, Yinuo and Zhou, Hengan and Shen, Sixian and Gu, Jiayuan and Yu, Jingyi}, title = {SPREAD: Spatial-Physical REasoning via geometry Aware Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31008-31018} }
DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Yang, Yueqi and Feng, Chaoyu and Lei, Lei and Zeng, Bing and Liu, Shuaicheng}, title = {DMAligner: Enhancing Image Alignment via Diffusion Model Based View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16541-16550} }
ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Chia-Ming and Lin, Yu-Fan and Jiang, Jin-Hui and Hsiao, Yu-Jou and Hsu, Chih-Chung and Liu, Yu-Lun}, title = {ReflexSplit: Single Image Reflection Separation via Layer Fusion-Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1300-1309} }
SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training-
[pdf]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Le and dos Santos, Thiago Freitas and Magnenat-Thalmann, Nadia and Wac, Katarzyna}, title = {SHands: A Multi-View Dataset and Benchmark for Surgical Hand-Gesture and Error Recognition Toward Medical Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42879-42890} }
Retrieving Counterfactuals Improves Visual In-Context Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Guangzhi and Sinha, Sanchit and He, Zhenghao and Zhang, Aidong}, title = {Retrieving Counterfactuals Improves Visual In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24352-24362} }
Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenyang and Tang, Wenbing and Huang, Yihao and Zhan, Sinong Simon and Hu, Ming and Jia, Xiaojun and Liu, Yang}, title = {Shedding Light on VLN Robustness: A Black-box Framework for Indoor Lighting-based Adversarial Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1565-1574} }
LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhanhao and Yang, Tao and Wu, Jie and Feng, Chengjian and Zheng, Liang}, title = {LeapAlign: Post-training Flow Matching Models at Any Generation Step by Building Two-Step Trajectories}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23238-23248} }
EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seungjun and Wang, Zihan and Wang, Yunsong and Lee, Gim Hee}, title = {EmbodiedSplat: Online Feed-Forward Semantic 3DGS for Open-Vocabulary 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23774-23784} }
From Infusion to Assimilation Distillation for Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Jiankang and Luo, Ye and Liu, Yinan and Yuan, Junsong}, title = {From Infusion to Assimilation Distillation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20985-20995} }
UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yanran and Zheng, Wenzhao and Li, Yifei and Yu, Bingyao and Zheng, Yu and Chen, Lei and Lu, Jiwen and Zhou, Jie}, title = {UniGenDet: A Unified Generative-Discriminative Framework for Co-Evolutionary Image Generation and Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16226-16236} }
Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, JiaKui and Liu, Jialun and Yang, Liying and Zhang, Xinliang and Li, Kaiwen and Zeng, Shuang and Li, Yuanwei and Huang, Haibin and Zhang, Chi and Lu, Yanye}, title = {Geometry-as-context: Modulating Explicit 3D in Scene-consistent Video Generation to Geometry Context}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4258-4268} }
Changes in Real Time: Online Scene Change Detection with Multi-View Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Galappaththige_2026_CVPR, author = {Galappaththige, Chamuditha Jayanga and Lai, Jason and Windrim, Lloyd and Dansereau, Donald and Suenderhauf, Niko and Miller, Dimity}, title = {Changes in Real Time: Online Scene Change Detection with Multi-View Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32246-32256} }
SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Aixuan and Xiang, Mochu and Hou, Bosen and Wan, Zhexiong and Zhang, Jing and Dai, Yuchao}, title = {SABER: Spatially Consistent 3D Universal Adversarial Objects for BEV Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25841-25850} }
PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zheng and Zhang, Xueyi and Guo, Yanming and Xie, Yuxiang and Ding, Zhaoyun and Cai, Siqi and Li, Haizhou and Lao, Mingrui}, title = {PAUL: Uncertainty-Guided Partition and Augmentation for Robust Cross-View Geo-Localization under Noisy Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5389-5398} }
Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xinshun and Li, Peiming and Wang, Ziyi and Fang, Zhongbin and Deng, Zhichao and Wu, Songtao and Li, Jason and Liu, Mengyuan}, title = {Superman: Unifying Skeleton and Vision for Human Motion Perception and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38335-38344} }
Controllable Federated Prompt Learning at Test Time-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Rui and Bai, Liang and Guo, Yanming and Ruan, Yirun and Yu, Tianyuan and Lu, Zhihe}, title = {Controllable Federated Prompt Learning at Test Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39455-39465} }
LightMover: Generative Light Movement with Color and Intensity Controls-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Wang, Tianyu and Kim, Soo Ye and Shu, Zhixin and Yu, Xin and Hold-Geoffroy, Yannick and Chaturvedi, Sumit and Wu, Qi and Lin, Zhe and Cohen, Scott}, title = {LightMover: Generative Light Movement with Color and Intensity Controls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8997-9007} }
Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sixian and Tang, Zhiwei and Chang, Tsung-Hui}, title = {Diffusion Sampling Path Tells More: An Efficient Plug-and-Play Strategy for Sample Filtering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36030-36039} }
EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Tao and Yao, Shengtao and Zeng, Rong and Zhu, Zunjie and Zheng, Bolun and Sun, Yaoqi and Fu, Ying and Yan, Chenggang}, title = {EMR-Diff: Edge-aware Multimodal Residual Diffusion Model for Hyperspectral Image Super-resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23419-23429} }
Group Editing: Edit Multiple Images in One Go-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yue and Wang, Xinyu and Ma, Qianli and Wang, Qinghe and Zheng, Mingzhe and Yang, Xiangpeng and Li, Hao and Zhao, Chongbo and Ying, Jixuan and Yang, Harry and Liu, Hongyu and Chen, Qifeng}, title = {Group Editing: Edit Multiple Images in One Go}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43418-43428} }
I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Juntong and Wang, Jiarui and Duan, Huiyu and Kang, Jiaxiang and Zhai, Guangtao and Min, Xiongkuo}, title = {I2I-Bench: A Comprehensive Benchmark Suite for Image-to-Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15354-15364} }
Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Verma_2026_CVPR, author = {Verma, Dhruv and Qiu, Andrew and Rangel, Roberto and Barman, Ayandev and Yang, Hao and Hu, Chenjia and Zhang, Fengqi and Genov, Roman and Lindell, David B. and Kutulakos, Kiriakos N. and Mariakakis, Alex}, title = {Lumosaic: Hyperspectral Video via Active Illumination and Coded-Exposure Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26761-26771} }
HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Tingting and Tao, Xinsong and Yin, Yufei and Tan, Min and Zhao, Sicheng and Yu, Zhou}, title = {HERO: Hierarchical Embedding-Refinement for Open-Vocabulary Temporal Sentence Grounding in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31867-31876} }
Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sahoo_2026_CVPR, author = {Sahoo, Aadarsh and Gkioxari, Georgia}, title = {Conversational Image Segmentation: Grounding Abstract Concepts with Scalable Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39476-39485} }
SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Peng and Wang, Yongcai and Chen, Wang and Cao, Hualong and Yang, Kang and Li, Chunxu and Wen, Jie and Li, Deying}, title = {SAMosaic3D: Modular Scene Assembly for Real-Time 3D Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17894-17904} }
InfinityHuman: Towards Long-Term Audio-Driven Human Animation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaodi and Xie, Pan and Ren, Yi and Gan, Qijun and Zhang, Chen and Kong, Fangyuan and Yin, Xiang and Yuan, Zehuan and Peng, Bingyue}, title = {InfinityHuman: Towards Long-Term Audio-Driven Human Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3978-3987} }
Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chetan_2026_CVPR, author = {Chetan, Aditya and Cai, Eric and Kushwaha, Peeyush and Kani, Bharath Raj Nagoor and Mall, Utkarsh and Wang, Qianqian and Snavely, Noah and Hariharan, Bharath}, title = {Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16624-16634} }
Demo2Tutorial: From Human Experience to Multimodal Software Tutorials-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Zechen and Chen, Zhiheng and Lin, Yiqi and Lin, Kevin Qinghong and Gao, Difei and Guo, Xiangwu and Wang, Xin and Shou, Mike Zheng}, title = {Demo2Tutorial: From Human Experience to Multimodal Software Tutorials}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29588-29597} }
Reliable Clustering Number Estimation for Contrastive Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Cheng, Li and Nie, Jia and Min, Shiquan and Zhu, Jiangping}, title = {Reliable Clustering Number Estimation for Contrastive Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30162-30171} }
VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Hoeg_2026_CVPR, author = {H{\o}eg, August Leander and Bardenfleth, Sophia Wiinberg and Kjer, Hans Martin and Dyrby, Tim Bj{\o}rn and Dahl, Vedrana Andersen and Dahl, Anders Bjorholm}, title = {VoDaSuRe: A Large-Scale Dataset Revealing Domain Shift in Volumetric Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2166-2176} }
F$^2$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination-
[pdf]
[bibtex]@InProceedings{Pu_2026_CVPR, author = {Pu, Bin and Liang, Xusheng and Ding, Xinpeng and Wu, Jinlin and Lei, Zhen and Li, Shengli and Li, Kenli and Ma, Jiawei}, title = {F\${\textasciicircum}2\$-Assist: Multi-Phase Fetal Growth Forecast and Report Generation from Ultrasound Examination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35341-35350} }
Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Shunkai and Yan, Zike and Xue, Fei and Wu, Dong and Deng, Yuchen and Zha, Hongbin}, title = {Online3R: Online Learning for Consistent Sequential Reconstruction Based on Geometry Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36529-36538} }
VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chao and Liu, Fang and Li, Shuo and Liu, Yang and Wang, Jiahao and Huang, Xinyan and Li, Lingling and Chen, Puhua and Liu, Xu and Ma, Wenping and Yu, Siqi}, title = {VDFE: Difference-Aware 3D Scene Editing with Non-Intrusive Video Diffusion Priors for Multi-View Consistency and Efficiency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18375-18385} }
RigMo: Unifying Rig and Motion Learning for Generative Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Hao and Luo, Jiahao and Wan, Bohui and Zhao, Yizhou and Li, Zongrui and Vasilkovsky, Michael and Wang, Chaoyang and Wang, Jian and Ahuja, Narendra and Zhou, Bing}, title = {RigMo: Unifying Rig and Motion Learning for Generative Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25438-25449} }
TokenLight: Precise Lighting Control in Images using Attribute Tokens-
[pdf]
[supp]
[bibtex]@InProceedings{Chaturvedi_2026_CVPR, author = {Chaturvedi, Sumit and Hold-Geoffroy, Yannick and Ren, Mengwei and Liu, Jingyuan and Zhang, He and Mei, Yiqun and Dorsey, Julie and Shu, Zhixin}, title = {TokenLight: Precise Lighting Control in Images using Attribute Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19737-19748} }
Linking Modality Isolation in Heterogeneous Collaborative Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Changxing and Chao, Zichen and Chen, Siheng}, title = {Linking Modality Isolation in Heterogeneous Collaborative Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39765-39774} }
UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Tian and Fei, Song and Zhu, Lei}, title = {UltraFlux: Data-Model Co-Design for High-quality Native 4K Text-to-Image Generation across Diverse Aspect Ratios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22070-22079} }
InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vu_2026_CVPR, author = {Vu, Duc and Nguyen, Kien and Nguyen, Trong-Tung and Nguyen, Ngan and Nguyen, Phong and Nguyen, Khoi and Pham, Cuong and Tran, Anh}, title = {InverFill: One-Step Inversion for Enhanced Few-Step Diffusion Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25677-25687} }
Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2026_CVPR, author = {Ge, Shiran and Huang, Chenyi and Ai, Yuang and Fan, Qihang and Huang, Huaibo and He, Ran}, title = {Expand and Prune: Maximizing Trajectory Diversity for Effective GRPO in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41913-41922} }
Disentangled Textual Priors for Diffusion-based Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Lei and Liu, Xin and Tong, Xinze and Li, Zhiliang and Liu, Jie and Tang, Jie and Wu, Gangshan}, title = {Disentangled Textual Priors for Diffusion-based Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38228-38237} }
NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yi and Wang, Tao and Jin, Yi and Lang, Congyan and Li, Yidong and Ling, Haibin}, title = {NG-GS: NeRF-guided 3D Gaussian Splatting Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42061-42070} }
HandWorld: Hand-Centric Unified Video Action Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhihao and Du, Zhiying and Yang, Xitong and Wu, Zuxuan}, title = {HandWorld: Hand-Centric Unified Video Action Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15976-15985} }
Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mohammad_2026_CVPR, author = {Mohammad, Mohammed Rahman Sherif Khan and Behera, Ardhendu and Pradhan, Sandip and Kumar, Swagat and Ahmed, Amr}, title = {Training-Only Heterogeneous Image-Patch-Text Graph Supervision for Advancing Few-Shot Learning Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19613-19622} }
Animator-Centric Skeleton Generation on Objects with Fine-Grained Details-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Mingze and Zeng, Cheng and Pei, Jiansong and Chen, Junhao and Song, Chaoyue and Wang, Shaohui and Chang, Tianyuan and Huang, Bin and Zeng, Zijiao and Huang, Ruqi}, title = {Animator-Centric Skeleton Generation on Objects with Fine-Grained Details}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17336-17345} }
ROSE: Rotate Your Large Language Model to See-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2026_CVPR, author = {Yue, Tongtian and Gao, Xuange and Guo, Longteng and Zhao, Zijia and Liu, Zikang and Jiang, Jie and Huang, Hua and Liu, Jing}, title = {ROSE: Rotate Your Large Language Model to See}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19265-19275} }
Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuqing and Zeng, Guotian and Yuan, Zhenqiao and He, Zhenyu and Li, Xin and Wang, Yaowei and Yang, Ming-Hsuan}, title = {Interactive Tracking: A Human-in-the-Loop Paradigm with Memory-Augmented Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35166-35176} }
TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xinguo and Shen, Yixin and Chaudhari, Rahul}, title = {TokenHand: Discrete Token Representation for Efficient Hand Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8921-8931} }
VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Shikun and Qu, Liao and Zhang, Huichao and Liu, Yiheng and Song, Yangyang and Li, Xian and Jiang, Yi and Wang, Xu and Jia, Jia and Du, Daniel K. and Wu, Xinglong}, title = {VAR RL Done Right: Tackling Asynchronous Policy Conflicts in Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1874-1884} }
Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Song and Zhao, Zhe and Zhu, Fei and Cheng, Ji and Lin, Xi and Zhang, Qingfu and Meng, Gaofeng}, title = {Beyond Myopic Alignment: Lookahead Optimization for Online Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18053-18062} }
Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yu and Liu, Yu and Wu, Zhong-Cheng and Cheng, Juan and Li, Huafeng and Chen, Xun}, title = {Degradation-Robust Fusion: An Efficient Degradation-Aware Diffusion Framework for Multimodal Image Fusion in Arbitrary Degradation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33848-33858} }
Endless World: Real-Time 3D-Aware Long Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ke and Xu, Jiacong and Mei, Yiqun and Patel, Vishal M.}, title = {Endless World: Real-Time 3D-Aware Long Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18386-18395} }
Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Junjie and Xue, Bao and Wang, Meiling and Shao, Wei and Zhang, Daoqiang}, title = {Advancing Cancer Prognosis with Hierarchical Fusion of Genomic, Proteomic and Pathology Imaging Data from a Systems Biology Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12554-12564} }
RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Liyao and Chen, Ruichen and Gao, Chao and Niu, Di}, title = {RAISE: Requirement-Adaptive Evolutionary Refinement for Training-Free Text-to-Image Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22038-22048} }
Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Engelbracht_2026_CVPR, author = {Engelbracht, Tim and Zurbr\"ugg, Ren\'e and Wohlrapp, Matteo and B\"uchner, Martin and Valada, Abhinav and Pollefeys, Marc and Blum, Hermann and Bauer, Zuria}, title = {Hoi! - A Multimodal Dataset for Force-Grounded, Cross-View Articulated Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8880-8890} }
4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Renjie and Li, Hongdong and Alvarez, Jose M. and Liu, Miaomiao}, title = {4DSurf: High-Fidelity Dynamic Scene Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22539-22549} }
MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oshima_2026_CVPR, author = {Oshima, Yuta and Miyake, Daiki and Matsutani, Kohsei and Iwasawa, Yusuke and Suzuki, Masahiro and Matsuo, Yutaka and Furuta, Hiroki}, title = {MultiBanana: A Challenging Benchmark for Multi-Reference Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {448-460} }
Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qian and Fu, Rao and Li, Jiangtao and Liu, Fan}, title = {Distilling Unsigned Distance Function for Surface Reconstruction from 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4891-4901} }
TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiqiang and Dong, Yitong and Wei, Xian}, title = {TUDSR: Twice Upsampling-Diffusion for Higher Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38208-38217} }
TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sabaghi_2026_CVPR, author = {Sabaghi, Arian and Oramas, Jose}, title = {TriLite: Efficient Weakly Supervised Object Localization with Universal Visual Features and Tri-Region Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41386-41395} }
VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Zhiwen and Zhang, Jian and Li, Renjie and Zhang, Junge and Chen, Runjin and Hu, Hezhen and Wang, Kevin and Wang, Peihao and Qu, Huaizhi and Zhou, Shijie and Wang, Dilin and Yan, Zhicheng and Xu, Hongyu and Theiss, Justin and Chen, Tianlong and Li, Jiachen and Tu, Zhengzhong and Wang, Zhangyang and Ranjan, Rakesh}, title = {VLM-3R: Vision-Language Models Augmented with Instruction-Aligned 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31054-31065} }
Pixel Motion Diffusion is What We Need for Robot Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, E-Ro and Zhang, Yichi and Ranasinghe, Kanchana and Li, Xiang and Ryoo, Michael S.}, title = {Pixel Motion Diffusion is What We Need for Robot Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23663-23672} }
TouchDream: 3D Object Completion through Imagined Touch-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanbo and Wang, Xinning and Zhang, Zhaoxuan and Wang, Changlong and Xia, Qianchen and Wei, Xiaopeng and Yang, Xin}, title = {TouchDream: 3D Object Completion through Imagined Touch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8901-8910} }
Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shilong and Xie, Xiurui and Zhan, Qiugang and Wang, Luochao and Deng, Yong and Liu, Guisong}, title = {Sparsely Timing the Change: A Spiking Temporal Framework for Remote Sensing Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34575-34585} }
FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes-
[pdf]
[supp]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Donglai and Modi, Vismay and Dagli, Rishit and Trusty, Ty and Daviet, Gilles and Chen, Anka He and Sharp, Nicholas and Levin, David I.W.}, title = {FreeForm: Reduced-Order Deformable Simulation from Particle-Based Skinning Eigenmodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32475-32484} }
RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mosleh_2026_CVPR, author = {Mosleh, Ali and Ali, Faraz and Zhang, Fengjia and Tsogkas, Stavros and Lee, Junyong and Brown, Michael S. and Levinshtein, Alex}, title = {RAW-Domain Degradation Models for Realistic Smartphone Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23430-23439} }
Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Inha and Kim, Eunki and Ryu, Wonjeong and Shin, Jaeyo and Yu, Seungjun and Kang, Yoon-Hee and Jeong, Seongeun and Kim, Eunhye and Kim, Soontae and Shim, Hyunjung}, title = {Real-Time Long Horizon Air Quality Forecasting via Group-Relative Policy Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6421-6431} }
LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Long and Fauth, Micha and Jaeger, Bernhard and Dauner, Daniel and Igl, Maximilian and Geiger, Andreas and Chitta, Kashyap}, title = {LEAD: Minimizing Learner-Expert Asymmetry in End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39775-39785} }
PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Suhyeon and Ye, Jong Chul}, title = {PromptLoop: Plug-and-Play Prompt Refinement via Latent Feedback for Diffusion Model Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41859-41869} }
DIMOS: Disentangling Instance-level Moving Object Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Hongxiang and Ren, Hongwei and Lin, Xiaopeng and Huang, Yulong and Xie, Zeke and Cheng, Bojun}, title = {DIMOS: Disentangling Instance-level Moving Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39806-39816} }
PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tze_2026_CVPR, author = {Tze, Christina Ourania and Dauner, Daniel and Liao, Yiyi and Tsishkou, Dzmitry and Geiger, Andreas}, title = {PrITTI: Primitive-based Generation of Controllable and Editable 3D Semantic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32613-32624} }
DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2026_CVPR, author = {Das, Gyanendra and Jena, Sai}, title = {DSCA: Dynamic Subspace Concept Alignment for Lifelong VLM Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40772-40781} }
BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiachen and Lin, Xianhui and Dong, Yi and Zheng, Zebiao and Liu, Xing and Gu, Hong and Fang, Yanmei}, title = {BeautyGRPO: Aesthetic Alignment for Face Retouching via Dynamic Path Guidance and Fine-Grained Preference Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25110-25120} }
DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jinzhou and Feng, Fan and Fu, Minghao and Lin, Wenjun and Yang, Jing and Huang, Biwei and Wang, Keze}, title = {DreamSAC: Learning Hamiltonian World Models via Symmetry Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15188-15198} }
BiGain: Unified Token Compression for Joint Generation and Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jiacheng and Tang, Shengkun and Cui, Jiacheng and Xu, Dongkuan and Shen, Zhiqiang}, title = {BiGain: Unified Token Compression for Joint Generation and Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31931-31940} }
Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zelong and Wu, Jiahui and Ba, Ying and Jing, Dong and Lu, Zhiwu}, title = {Say Cheese! Detail-Preserving Portrait Collection Generation via Natural Language Edits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7827-7836} }
Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Keliang and Chen, Zizhi and Li, Mingcheng and Tang, Jingqun and Yang, Dingkang and Zhang, Lihua}, title = {Resolving Evidence Sparsity: Agentic Context Engineering for Long-Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19452-19462} }
4DP-QA: Scalable QA for 4D Perception in Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Seokju and Badki, Abhishek and Su, Hang and Jiang, Jindong and Zeng, Ziyao and Kim, Seungryong and Liu, Sifei and Gallo, Orazio}, title = {4DP-QA: Scalable QA for 4D Perception in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23869-23879} }
Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Zhuan and Farashah, Alireza Dehghanpour and de Vries, Rik and Farnadi, Golnoosh}, title = {Neighbor-Aware Localized Concept Erasure in Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17440-17450} }
HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Qihui and Zhang, Tao and Wang, Yuchen and Chen, Shuangwu and Tan, Xiaobin and Yang, Jian and Liu, Yang and Pan, Yinfei}, title = {HAWK: Head Importance-Aware Visual Token Pruning in Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39583-39592} }
DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jiaqi and Xu, Qinfu and Pan, Liyuan}, title = {DarkShake-DVS: Event-based Human Action Recognition under Low-light and Shaking Camera Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20149-20159} }
EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Jing_2026_CVPR, author = {Jing, Yinuo and Wu, Jinyan and Yang, Zixi and Liang, Kongming and Zhu, Xiatian and Ma, Zhanyu}, title = {EthoCLIP: Ontology-Enhanced Video-Language Pretraining for Animal Behavior Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31196-31206} }
TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chengyu and Lu, Hanzhang and Nie, Jie and Du, Shan}, title = {TriSim: Tri-Dimensional Similarity Modeling with Extreme Value Theory for False-Negative Mitigation in Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23944-23954} }
MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chieh-Yun and Wang, Zhonghao and Chen, Qi and Ye, Zhifan and Shi, Min and Zhao, Yue and Zhao, Yinan and Qu, Hui and Lin, Wei-An and Shen, Yiru and Kale, Ajinkya and Essa, Irfan and Shi, Humphrey}, title = {MapReduce LoRA: Advancing the Pareto Front in Multi-Preference Optimization for Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34375-34384} }
FedSST: Rethinking Fair Federated Graph Learning under Structural Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Dingyi}, title = {FedSST: Rethinking Fair Federated Graph Learning under Structural Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10335-10345} }
Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jingze and Zhang, Quan and Suo, Hongfei and Cai, Zeqiang and Chen, Hongbo}, title = {Beyond Perceptual Shortcuts: Causal-Inspired Debiasing Optimization for Generalizable Video Reasoning in Lightweight MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12259-12268} }
Self-Consistency for LLM-Based Motion Trajectory Generation and Verification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Jiaju and Jones, R. Kenny and Wu, Jiajun and Agrawala, Maneesh}, title = {Self-Consistency for LLM-Based Motion Trajectory Generation and Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17357-17366} }
Agentic Retoucher for Text-To-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Shaocheng and Liang, Jianfeng and Cai, Chunlei and Geng, Cong and Duan, Huiyu and Zhang, Xiaoyun and Hu, Qiang and Zhai, Guangtao}, title = {Agentic Retoucher for Text-To-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29114-29125} }
Scalable Trajectory Generation for Whole-Body Mobile Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2026_CVPR, author = {Niu, Yida and Chang, Xinhai and Liu, Xin and Jiao, Ziyuan and Zhu, Yixin}, title = {Scalable Trajectory Generation for Whole-Body Mobile Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1798-1808} }
UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Hao and Wang, Xudong and Zhang, Jialiang and Tong, Junlong and Chen, Xinghao and Lin, Junyan and Ma, Yunpu and Shen, Xiaoyu}, title = {UTPTrack: Towards Simple and Unified Token Pruning for Visual Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20963-20972} }
UniDef: Universal Defense Against Unauthorized Image Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Meng, Lingzhuang and Lv, Xiang and Wu, Mengyao and Chen, Xinyuan and Zhang, Qiao and Liu, Chang and Qiao, Yuanjian and Dong, Chao}, title = {UniDef: Universal Defense Against Unauthorized Image Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8631-8640} }
CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Chunlei and Huang, Guanhong and Fu, Rong and Jian, Runmin and Gan, Zhongxue and Ouyang, Chun}, title = {CLCR: Cross-Level Semantic Collaborative Representation for Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1606-1615} }
Optical Diffraction-based Convolution for Semiconductor Lithography-
[pdf]
[supp]
[bibtex]@InProceedings{Son_2026_CVPR, author = {Son, Young-Han and Shin, Dong-Hee and Lee, Deok-Joong and Lee, Hyun Jung and Kam, Tae-Eui}, title = {Optical Diffraction-based Convolution for Semiconductor Lithography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12458-12468} }
DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Al_Nomaan_Nafi_2026_CVPR, author = {Al Nomaan Nafi, Abdullah and Rahaman, Habibur and Haider, Zafaryab and Mahfuz, Tanzim and Suya, Fnu and Bhunia, Swarup and Chakraborty, Prabuddha}, title = {DASH: A Meta-Attack Framework for Synthesizing Effective and Stealthy Adversarial Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27884-27893} }
Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Sun, Bo and Bai, Yijing and Casser, Vincent and Peng, Songyou and Zhu, Zehao and Shih, Meng-Li and Masotto, Xander and Su, Shih-Yang and Parvate, Kanaad and Ge, Tiancheng and Bieske, Linn and Anguelov, Dragomir and Tan, Mingxing and Jiang, Chiyu Max}, title = {Sensor2Sensor: Cross-Embodiment Sensor Conversion for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32093-32102} }
GSNR: Graph Smooth Null-Space Representation for Inverse Problems-
[pdf]
[supp]
[bibtex]@InProceedings{Gualdron-Hurtado_2026_CVPR, author = {Gualdr\'on-Hurtado, Romario and Jacome, Roman and Su\'arez, Rafael S. and Arguello, Henry}, title = {GSNR: Graph Smooth Null-Space Representation for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12469-12479} }
RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yujia and Li, Yuyan and Liu, Jiuming and Zhang, Fang-Lue and Zheng, Xinhu and Dodgson, Neil.A}, title = {RL-ScanIQA: Reinforcement-Learned Scanpaths for Blind 360deg Image Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37401-37412} }
U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Xiang and Gao, Feng and Zhang, Yong and Pang, Youxin and Xiaoming, Xu and Kang, Zhuoliang and Wei, Xiaoming and Liu, Yebin}, title = {U-Mind: A Unified Framework for Real-Time Multimodal Interaction with Audiovisual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10874-10886} }
ReasonEdit: Towards Reasoning-Enhanced Image Editing Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Fukun and Liu, Shiyu and Han, Yucheng and Wang, Zhibo and Xing, Peng and Wang, Rui and Cheng, Wei and Wang, Yingming and Li, Aojie and Yin, Zixin and Chen, Pengtao and Zeng, Xianfang and Yu, Gang and Jiang, Daxin}, title = {ReasonEdit: Towards Reasoning-Enhanced Image Editing Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23283-23293} }
PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Linqing and Xu, Zhiyong and Xing, Ximing and Cheng, Yiji and Zhao, Zhiyuan and Li, Donghao and Hang, Tiankai and Li, Zhenxi and Tao, Jiale and Wang, Qixun and Li, Ruihuang and Chen, Comi and Li, Xin and Wu, Mingrui and Deng, Xinchi and Gu, Shuyang and Wang, Chunyu and Lu, Qinglin}, title = {PromptEnhancer: Taming Your Rewriter for Text-to-Image Generation via Fine-Grained Reward}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14895-14904} }
Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Haonan and Luo, Yuxuan and Lian, Zhouhui}, title = {Beyond Patches: Global-aware Autoregressive Model for Multimodal Few-Shot Font Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {517-528} }
FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Jin and Zhao, Boran and Xu, Jiajun and Guo, Jiaqi and Guan, Shuo and Ren, Pengju}, title = {FAST: Topology-Aware Frequency-Domain Distribution Matching for Coreset Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24748-24758} }
Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xingyu and Ren, Pengfei and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Liao, Jianxin and Wang, Jingyu}, title = {Clay-to-Stone: Phase-wise 3D Gaussian Splatting for Monocular Articulated Hand-Object Manipulation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23128-23138} }
eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Haojie and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {eRetinexGS: Retinex Modeling for Low-Light Scene Enhancement via Event Streams and 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8056-8066} }
WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Hao and Taymanov, Alexey and Zhang, Tong and Kumar, Aviral and Whitehead, Spencer}, title = {WebGym: Scaling Training Environments for Long-Horizon Visual Web Agents with Realistic Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12248-12258} }
Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ting and Wang, Qilong and Hou, Qibin and Hu, Qinghua}, title = {Test-Time Multi-Prompt Adaptation for Open-Vocabulary Remote Sensing Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10699-10709} }
The Midas Touch for Metric Depth-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Yu and Guo, Zizhan and Xiong, Zuyi and Zhang, Haoran and Feng, Yi and Zhao, Hongbo and Wang, Hanli and Fan, Rui}, title = {The Midas Touch for Metric Depth}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5804-5813} }
Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Mingwen and Zhang, Qiao and Chen, Xinyuan and Lv, Xiang and Meng, Lingzhuang and Liu, Chang and Zhan, Qinglin and Jian, Ling}, title = {Wavelet-Driven 3D Anomaly Detection under Pose-Agnostic and Sparse-View}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43083-43092} }
OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yiying and Cheng, Wei and Chen, Sijin and Fu, Honghao and Zeng, Xianfang and Cai, Yujun and Yu, Gang and Ma, Xingjun}, title = {OmniLottie: Generating Vector Animations via Parameterized Lottie Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39293-39303} }
GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jianwei and Yang, Fan and Li, Xin and Zhai, Qiang and Luo, Ao and Ren, Ziqi and Jiao, Zhicheng and Cheng, Hong}, title = {GeneVAR: Causal MeanFlow for Autoregressive Gene-to-WSI Tile Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34116-34125} }
Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zhengjian and Li, Yongzhi and Gao, Xinyuan and Chen, Quan and Jiang, Peng and Lu, Yanye}, title = {Narrative Weaver: Towards Controllable Long-Range Visual Consistency with Multi-Modal Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7707-7718} }
Bilevel Layer-Positioning LoRA for Real Image Dehazing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Ma, Long and Feng, Yuxin and Huang, Zhe and Zhou, Fan and Su, Zhuo}, title = {Bilevel Layer-Positioning LoRA for Real Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29969-29978} }
TrajTok: Learning Trajectory Tokens Enhances Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Zhang, Jianing and Huang, Weikai and Kumar, Ashutosh and Kong, Quan and Tuzel, Oncel and Li, Chun-Liang and Krishna, Ranjay}, title = {TrajTok: Learning Trajectory Tokens Enhances Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31207-31218} }
3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ogunleye_2026_CVPR, author = {Ogunleye, Makanjuola Adekunmi and Abdelrahman, Eman and Lourentzou, Ismini}, title = {3D-VCD: Hallucination Mitigation in 3D-LLM Embodied Agents through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40197-40207} }
SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yunyi and Li, Yingshu and Chen, Tong and Liu, Lingqiao and Wang, Lei and Zhou, Luping}, title = {SAT-RRG: LLM-Guided Self-Adaptive Training for Radiology Report Generation with Token-Level Push-Pull Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35363-35372} }
MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jingnan and Wang, Zhe and Fang, Xianze and Ren, Xingyu and Chen, Zhuo and Liu, Shengqi and Cheng, Yuhao and Lyu, Jiangjing and Yang, Xiaokang and Yan, Yichao}, title = {MoRE: 3D Visual Geometry Reconstruction Meets Mixture-of-Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14680-14691} }
UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization-
[pdf]
[supp]
[bibtex]@InProceedings{Phung_2026_CVPR, author = {Phung, Quynh and Ghimire, Sandesh and Hu, Minsi and Tsai, Chung-Chi and Huang, Jia-Bin}, title = {UniVerse: A Unified Modulation Framework for Segmentation-Free, Disentangled Multi-Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22007-22016} }
GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xiankang and Lin, Peile and Cui, Ying and Guo, Dongyan and Shen, Chunhua and Zhang, Xiaoqin}, title = {GeoMotion: Rethinking Motion Segmentation via Latent 4D Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28145-28155} }
Generative Point Tracking and Forecasting-
[pdf]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Xuanchen and Cao, Ang and Feng, Chao and Owens, Andrew}, title = {Generative Point Tracking and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28167-28178} }
Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Hongtao and Zhong, Bineng and Liang, Qihua and Zheng, Yaozong and Hu, Xiantao and Xue, Yuanliang and Song, Shuxiang}, title = {Dual-branch Distilled Transformer for Efficient Asymmetric UAV Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13615-13625} }
DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tojo_2026_CVPR, author = {Tojo, Kenji and Bickel, Bernd and Umetani, Nobuyuki}, title = {DiffSoup: Direct Differentiable Rasterization of Triangle Soup for Extreme Radiance Field Simplification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8353-8363} }
dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Yi and Luo, Siqi and Xu, Tianxiang and Qin, Qi and Chen, Haoxing and Zhu, Kaiwen and Zhang, Zhiwei and He, Yangfan and Zhang, Rongchao and Bai, Jinbin and Cao, Shuo and Fu, Bin and He, Junjun and Liu, Yihao and Cao, Yuewen and Liu, Xiaohong}, title = {dMLLM-TTS: Self-Verified and Efficient Test-Time Scaling for Diffusion Multi-Modal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35726-35735} }
Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Yongjie and Wang, Zhouxia and Liu, Yang and Luo, Kaijun and Wen, Yifan and Dai, Mingtong and Chen, Weixing and Chen, Ziliang and Liu, Lingbo and Li, Guanbin and Lin, Liang}, title = {Learning to See and Act: Task-Aware Virtual View Exploration for Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13386-13396} }
RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments-
[pdf]
[supp]
[bibtex]@InProceedings{Bhurtel_2026_CVPR, author = {Bhurtel, Manish and Rawat, Danda B.}, title = {RMAE-ProGRess: Advancing Semantic Segmentation in Unstructured Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20511-20520} }
DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Aditya Kumar and Kandala, Hitesh and Brahma, Pratik Prabhanjan and Liu, Zicheng and Barsoum, Emad}, title = {DUET-VLM: Dual stage Unified Efficient Token reduction for VLM Training and Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17651-17660} }
Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Endo_2026_CVPR, author = {Endo, Mark and Yeung-Levy, Serena}, title = {Downscaling Intelligence: Exploring Perception and Reasoning Bottlenecks in Small Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {778-788} }
LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Nan and Straub, Julian and Zhang, Fan and Newcombe, Richard and Engel, Jakob and Ma, Lingni}, title = {LAMP: Localization Aware Multi-camera People Tracking in Metric 3D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21208-21220} }
FPS-Bench: A Benchmark for High Frame-Rate Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Choudhury_2026_CVPR, author = {Choudhury, Rohan and Dandurand, Jean-Sebastien and Qiu, Kai and Bhat, Kshitij Madhav and Sharma, Kartik and Dahiya, Liza and Zhao, Yizhou and Kundu, Souraja and Lin, Chun-Hsien and Kitani, Kris M. and Jeni, L\'aszl\'o A.}, title = {FPS-Bench: A Benchmark for High Frame-Rate Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18598-18608} }
Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ouyang_2026_CVPR, author = {Ouyang, Kun and Liu, Yuanxin and Yao, Linli and Cai, Yishuo and Zhou, Hao and Meng, Fandong and Zhou, Jie and Sun, Xu}, title = {Conan: Progressive Learning to Reason Like a Detective over Multi-Scale Visual Evidence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41089-41099} }
Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Jiatong and Duan, Zicheng and van den Hengel, Anton and Liu, Lingqiao}, title = {Points-to-3D: Structure-Aware 3D Generation with Point Cloud Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19928-19939} }
Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Jiawei and Wang, Shigeng and Li, Chao and Liu, Xiaolong and Yao, Anbang}, title = {Chain-of-Models Pre-Training: Rethinking Training Acceleration of Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34491-34501} }
Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Yuxuan and Bai, Weimin and Wang, Yifei and Luo, Weijian and Sun, He}, title = {Masked Auto-Regressive Variational Acceleration: Fast Inference Makes Practical Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41881-41891} }
ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ruixun and Fu, Bowen and Song, Jiayi and Li, Kaiyu and Li, Wanchen and Xue, Lanxuan and Qiao, Hui and Zhang, Weizhan and Meng, Deyu and Cao, Xiangyong}, title = {ZoomEarth: Active Perception for Ultra-High-Resolution Geospatial Vision-Language Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34877-34888} }
V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Ye and Wu, Tong and Deschaintre, Valentin and Ceylan, Duygu and Georgiev, Iliyan and Huang, Chun-Hao Paul and Hu, Yiwei and Chen, Xuelin and Wang, Tuanfeng Yang}, title = {V-RGBX: Video Editing with Accurate Controls over Intrinsic Properties}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23182-23192} }
IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Junwei and Liu, Mengzu and Wang, Zhenyu and Wu, Fangfang and Wu, Sijia and Huang, Tao and Dong, Weisheng}, title = {IAFMNet: Information-Aware Feature Modulation for Efficient Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30564-30573} }
ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Rui and Zhang, Jianfeng and Lin, Jing and Yi, Xuanyu and Liang, Yixun and Luo, Guan and Li, Xiu and Li, Zeming and Tan, Ping}, title = {ViLearn: Accelerating Training Convergence of Image-to-3D Generation via Visibility Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27041-27051} }
FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Da and Yang, Xuesong and Guo, Zonghao and Zhang, Yichen and Chen, Chi and Zhang, Yidan and Yao, Yuan and Wan, Fang and Ke, Wei and Sun, Maosong}, title = {FlexiVideo: Variation-Aware Temporal Dynamics Modeling for Efficient Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9804-9814} }
Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Wei and Jiang, Yiwen and Zhou, Sijin and Qi, Zhuang and Xu, Zhongxing and Wang, Zhonghua and Tang, Feilong and Ge, Zongyuan}, title = {Seeing Through the Shift: Causality-Inspired Robust Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17766-17775} }
PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Chaonan and Qi, Jinwei and Xu, Sheng and Zhang, Peng and Zhang, Bang}, title = {PortraitDirector: A Hierarchical Disentanglement Framework for Controllable and Real-time Facial Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32377-32388} }
Vinedresser3D: Towards Agentic Text-guided 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Chi_2026_CVPR, author = {Chi, Yankuan and Li, Xiang and Huang, Zixuan and Rehg, James Matthew}, title = {Vinedresser3D: Towards Agentic Text-guided 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12673-12683} }
FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Junhao and Guo, Shi and Cai, Xin and Li, Xiaohui and Liu, Yihao and Yuan, Chun and Xue, Tianfan}, title = {FlashVSR: Towards Real-time Diffusion-Based Streaming Video Super Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43482-43493} }
R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zirui and Dong, Haoyu and Pei, Kexin and Mao, Chengzhi}, title = {R-C2: Cycle-Consistent Reinforcement Learning Improves Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36893-36903} }
Inference-time Physics Alignment of Video Generative Models with Latent World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Jianhao and Zhang, Xiaofeng and Friedrich, Felix and Beltran-Velez, Nicolas and Hall, Melissa and Askari-Hemmat, Reyhane and Han, Xiaochuang and Ballas, Nicolas and Drozdzal, Michal and Romero-Soriano, Adriana}, title = {Inference-time Physics Alignment of Video Generative Models with Latent World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16118-16129} }
VideoMaMa: Mask-Guided Video Matting via Generative Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lim_2026_CVPR, author = {Lim, Sangbeom and Oh, Seoung Wug and Huang, Jiahui and Yoon, Heeji and Kim, Seungryong and Lee, Joon-Young}, title = {VideoMaMa: Mask-Guided Video Matting via Generative Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3845-3855} }
Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhanbo and Ye, Dingqiang and Liu, Xiaoming and Kong, Yu}, title = {Unlocking Motion from Large Vision Models with a Semantic and Kinematic Duality for Gait Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28379-28390} }
Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kai and Zhou, Tao and Lei, Jiayi and Wang, Jing and Zhao, Jinman and Pian, Weiguo and Cheng, Yuan and Tian, Yapeng and Gao, Peng and Fu, Bin and Liu, Yihao and Hatzinakos, Dimitrios and Cao, Yuewen}, title = {Hear What You See: Video-to-Audio Generation with Diffusion Transformer and Semantic-Temporal Alignment-Ranked Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43396-43406} }
ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Ronggang and Meng, Fansen and Zhang, Huaidong and Xu, Xuemiao}, title = {ORD: Object-Relation Decoupling for Generalized 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30964-30973} }
UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Daehyun and Kim, Youngmin and Oh, Yoon Ju and Kim, Tae Hyun}, title = {UCMNet: Uncertainty-Aware Context Memory Network for Under-Display Camera Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29939-29948} }
PARSE: Part-Aware Relational Spatial Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Yinuo and Xu, Peijun and Shao, Kuixiang and Jiao, Yuyang and Zhang, Jingxuan and Yao, Kaixin and Gu, Jiayuan and Yu, Jingyi}, title = {PARSE: Part-Aware Relational Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38700-38710} }
SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Yimeng and Ren, Zhenbang and Wu, Haodi and Wei, Wenjie and Zhu, Rui-Jie and Wang, Shuai and Zhang, Dehao and Xiao, Yichen and Zhang, Jieyuan and Shi, Kexin and Wang, Jingzhinan and Eshraghian, Jason K. and Qu, Haicheng and Zhang, Malu}, title = {SDTrack: A Baseline for Event-based Tracking via Spiking Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36245-36254} }
UniPercept: A Unified Diffusion Model for Generalizable Visual Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zuyan and He, Zhenliang and Kan, Meina and Shan, Shiguang and Chen, Xilin}, title = {UniPercept: A Unified Diffusion Model for Generalizable Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43175-43186} }
WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Sicheng and Wan, Rui and Leng, Yifei and Liang, Gaoning and Ling, Li and Shang, Yanyi and Kong, Dehan}, title = {WebChain: A Large-Scale Human-Annotated Dataset of Real-World Web Interaction Traces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6209-6218} }
Probabilistic Precipitation Nowcasting with Rectified Flow Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Schusterbauer_2026_CVPR, author = {Schusterbauer, Johannes and Wiese, Jannik and Stracke, Nick and Phan, Timy and Ommer, Bj\"orn}, title = {Probabilistic Precipitation Nowcasting with Rectified Flow Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25742-25756} }
PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minjae and Hur, Sungwoo and Hwang, Soojin and Kim, Won Hwa}, title = {PR-MaGIC: Prompt Refinement Via Mask Decoder Gradient Flow For In-Context Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21659-21668} }
PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Shuyan and Mei, Yifan and Wu, Changli and Zheng, Yonghan and Ji, Jiayi and Cao, Liujuan and Ji, Rongrong}, title = {PixDLM: A Dual-Path Multimodal Language Model for UAV Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26165-26175} }
BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hao and Deng, Liyuan and Dai, Yongkang and Wang, Ruohan and Li, Jiahao and Bai, Yunpeng and Shi, Yilei}, title = {BrepVGAE: Variational Graph Autoencoder with Unified Latent Representation for B-rep}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39230-39238} }
EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jiafei and Zhou, Fengwei and Qu, Jin and Li, Wenjin Jason and Wu, Tong and Xue, Gengjian and Zhao, Zhikang and Wei, Daomin and Lu, Yichao and Na, Bailin}, title = {EvoComp: Learning Visual Token Compression for Multimodal Large Language Models via Semantic-Guided Evolutionary Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3532-3542} }
Vibe Spaces for Creatively Connecting and Expressing Visual Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Huzheng and Xu, Katherine and Lu, Andrew and Grossberg, Michael D. and Bai, Yutong and Shi, Jianbo}, title = {Vibe Spaces for Creatively Connecting and Expressing Visual Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21912-21921} }
The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Guannan and Zhou, Da-Wei and Li, Zhenguo and Ye, Han-Jia}, title = {The Golden Subspace: Where Efficiency Meets Generalization in Continual Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3866-3875} }
Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Wen and Zhao, Pengfei and Wang, Zongmeng and Hu, Yufan and Gao, Junyu}, title = {Dual-level Adaptation for Multi-Object Tracking: Building Test-Time Calibration from Experience and Intuition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28190-28200} }
DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zheng and Xiang, Lichuan and Cai, Xu and Wang, Bing and Yang, Bo and Wen, Hongkai}, title = {DynFusion: Rethinking Condition Fusion for Adaptive Multi-Conditional Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29103-29113} }
Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiahao and Xu, Zikun and Zhang, Yuner and Jiang, Zhongwei and Lu, Chenyang and Yang, Shuocheng and Wang, Yuxuan and Zhong, Jiaru and Zhang, Chuang and Xu, Shaobing and Wang, Jianqiang}, title = {Long-SCOPE: Fully Sparse Long-Range Cooperative 3D Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11599-11609} }
LongVT: Incentivizing "Thinking with Long Videos" via Native Tool Calling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zuhao and Wang, Sudong and Zhang, Kaichen and Wu, Keming and Leng, Sicong and Zhang, Yifan and Li, Bo and Qin, Chengwei and Lu, Shijian and Li, Xingxuan and Bing, Lidong}, title = {LongVT: Incentivizing ''Thinking with Long Videos'' via Native Tool Calling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33816-33826} }
Elucidating the SNR-t Bias of Diffusion Probabilistic Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Meng and Sun, Lei and Zeng, Jianhao and Chu, Xiangxiang and Zhan, Kun}, title = {Elucidating the SNR-t Bias of Diffusion Probabilistic Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43461-43470} }
Momentum Memory for Knowledge Distillation in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yongxin and Lu, Hao and Koyun, Onur C. and Zhu, Zhengjie and Demir, Muhammet F. and Gurcan, Metin N.}, title = {Momentum Memory for Knowledge Distillation in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6889-6899} }
Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Mingqian and Zhang, Shanshan and Yang, Jian}, title = {Revisiting Token Compression for Accelerating ViT-based Sparse Multi-View 3D Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18776-18785} }
Efficiently Reconstructing Dynamic Scenes One D4RT at a Time-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuhan and Le Moing, Guillaume and Koppula, Skanda and Rocco, Ignacio and Momeni, Liliane and Xie, Junyu and Sun, Shuyang and Sukthankar, Rahul and Barral, Jo\"elle K. and Hadsell, Raia and Ghahramani, Zoubin and Zisserman, Andrew and Zhang, Junlin and Sajjadi, Mehdi S. M.}, title = {Efficiently Reconstructing Dynamic Scenes One D4RT at a Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7382-7392} }
CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Youngjun and Kim, Hyeongyu and Hwang, Dosik}, title = {CD-Buffer: Complementary Dual-Buffer Framework for Test-Time Adaptation in Adverse Weather Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15050-15059} }
Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models-
[pdf]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jiajia and He, Yujia and Hou, Yuhan and Qi, Hang and Wang, Sihua and Shi, Jincheng and Li, Kwok Fung and Zheng, Zibin and Wu, Weibin}, title = {Ref4D-VideoBench: Four-Dimensional Reference-Based Evaluation of Text-to-Video Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7719-7729} }
ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Shishi and Zhou, Tongyu and Laidlaw, David H. and Chan, Gromit Yeuk-Yin}, title = {ChArtist: Generating Pictorial Charts with Unified Spatial and Subject Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29211-29221} }
TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2026_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Lee, Kyoung Mu}, title = {TeHOR: Text-Guided 3D Human and Object Reconstruction with Textures}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7100-7110} }
Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{You_2026_CVPR, author = {You, Yuyang and Li, Yongzhi and Li, Jiahui and Mu, Yadong and Chen, Quan and Jiang, Peng}, title = {Adaptive Video Distillation: Mitigating Oversaturation and Temporal Collapse in Few-Step Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43429-43439} }
Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Cheng and Sun, Ting and Liang, Suyin and Gao, Tingquan and Zhang, Zelun and Liu, Jiaxuan and Wang, Xueqing and Zhou, Changda and Liu, Hongen and Lin, Manhui and Zhang, Yue and Zhang, Yubo and Zhang, Jing and Zhang, Jun and Wei, Xing and Liu, Yi and Yu, Dianhai and Ma, Yanjun}, title = {Boosting Document Parsing Efficiency and Performance with Coarse-to-Fine Visual Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16655-16665} }
Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {Divide, Conquer, and Aggregate: Asymmetric Experts for Class-Imbalanced Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8503-8513} }
FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Shuyuan and Pan, Yueming and Huang, Yinming and Han, Xintong and Xing, Zhen and Dai, Qi and Qiu, Kai and Luo, Chong and Wu, Zuxuan}, title = {FlashPortrait: 6x Faster Infinite Portrait Animation with Adaptive Latent Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25163-25173} }
Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haoyan and Liu, Yahao and Lei, Yinjie and Duan, Lixin and Li, Wen}, title = {Dynamic Logits Adjustment and Exploration for Test-Time Adaptation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3143-3153} }
Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yaze and Zou, Yixiong and Li, Yuhua and Li, Ruixuan}, title = {Interpretable Cross-Domain Few-Shot Learning with Rectified Target-Domain Local Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41605-41615} }
Structural Action Transformer for 3D Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Xiaohan and Wang, Min and Weng, Bohong and Zhou, Wengang and Li, Houqiang}, title = {Structural Action Transformer for 3D Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28807-28818} }
SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hezhao and Yang, Jiacheng and Gao, Junlong and Li, Mengke and Zhang, Yiqun and Gowda, Shreyank N and Lu, Yang}, title = {SECOS: Semantic Capture for Rigorous Classification in Open-World Semi-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39627-39636} }
EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Shiyuan and Li, Ruihuang and Tao, Jiale and Shao, Shuai and Lu, Qinglin and Liao, Jing}, title = {EffectMaker: Unifying Reasoning and Generation for Customized Visual Effect Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16108-16117} }
CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yongxin and Yang, Zhicheng and Cao, Meng and Han, Mingfei and Lin, Haokun and Zhu, Yingying and Chang, Xiaojun and Liang, Xiaodan}, title = {CARE What Fails: Contrastive Anchored-REflection for Verifiable Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11976-11986} }
SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Zhanxuan and Xu, Qiyu and Duan, Yu and Tai, Yonghang and Li, Huafeng}, title = {SOTA: Self-adaptive Optimal Transport for Zero-Shot Classification with Multiple Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26624-26634} }
Mechanisms of Object Localization in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Schaumloffel_2026_CVPR, author = {Schauml\"offel, Timothy and Vilas, Martina G. and Roig, Gemma}, title = {Mechanisms of Object Localization in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31356-31365} }
Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Mingbo and Liu, Feng and Gevaert, Caroline and Vosselman, George and Cheng, Hao}, title = {Bridge: Basis-Driven Causal Inference Marries VFMs for Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31962-31973} }
Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering-
[pdf]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Meihong and Zheng, Yefeng}, title = {Dual-Level Confidence based Implicit Self-Refinement for Medical Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17215-17225} }
Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yi and Wan, Yi and Yu, Lei and Xia, Panwang and Wu, Qiong and Pei, Yingying and Huang, Xuejun and Zhang, Junjian and Cai, Xiangyuan and Hu, Hongwei and Zhang, Yongjun}, title = {Beyond Tie Points: Satellite Image Block Adjustment based on Dense Feature Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6443-6452} }
Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Junhao and Hou, Liang and Tao, Xin and Liao, Jing}, title = {Video-as-Answer: Predict and Generate Next Video Event with Joint-GRPO}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38915-38925} }
Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haichao and Liang, Tianming and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Refer-Agent: A Collaborative Multi-Agent System with Reasoning and Reflection for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39508-39517} }
SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ning and Wu, Tieyue and Sharif, Naeha and Boussaid, Farid and Zhu, Guangming and Mei, Lin and Bennamoun, Mohammed and Zhang, Liang}, title = {SkeletonContext: Skeleton-side Context Prompt Learning for Zero-Shot Skeleton-based Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20170-20180} }
KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Siyu and Chen, Feiyang and Zhang, Xiaojin and He, Kun}, title = {KVSmooth: Mitigating Hallucination in Multi-modal Large Language Models through Key-Value Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32561-32571} }
Extend3D: Town-Scale 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoon_2026_CVPR, author = {Yoon, Seungwoo and Kim, Jinmo and Park, Jaesik}, title = {Extend3D: Town-Scale 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5892-5901} }
PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ping_2026_CVPR, author = {Ping, Bowen and Jia, Chengyou and Luo, Minnan and Xia, Changliang and Shen, Xin and Dang, Zhuohang and Qian, Hangwei}, title = {PaCo-RL: Advancing Reinforcement Learning for Consistent Image Generation with Pairwise Reward Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34353-34363} }
A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Huijie and Cui, Shuhao and Cao, Haoxiang and Ma, Shuai and Wu, Kai and Kang, Guoliang}, title = {A Style is Worth One Code: Unlocking Code-to-Style Image Generation with Discrete Style Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1-10} }
Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shan and Shen, Maying and Chang, Nadine and Nguyen, Chuong and Li, Hongdong and Alvarez, Jose M.}, title = {Mitigating Multimodal Hallucinations via Gradient-based Self-Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32539-32549} }
GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Xuan and Xiang, Mochu and Shen, Zhelun and Wu, Jinbo and Wu, Chenming and Zhao, Chen and Wang, Kaisiyuan and Zhou, Hang and Liu, Shanshan and Feng, Haocheng and He, Wei and Wang, Jingdong}, title = {GenHOI: Towards Object-Consistent Hand-Object Interaction with Temporally Balanced and Spatially Selective Object Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23117-23127} }
Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shiqin and Chen, Haoyang and Huang, Huaizhou and He, Yinkan and Sun, Dongfang and Chen, Xiaoqing and Liu, Xingyu and Wang, Zheng and Zhao, Kaiyan}, title = {Heuristic Self-Paced Learning for Domain Adaptive Semantic Segmentation under Adverse Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3815-3824} }
GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Jingjing and Han, Boyao and Shi, Chen and Xiao, Lei and Yang, Long and Shi, Shaoshuai and Jiang, Li}, title = {GeoPredict: Leveraging Predictive Kinematics and 3D Gaussian Geometry for Precise VLA Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13529-13539} }
INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Seonho and Hong, Junhyeong and Lee, Kyungjae and Oh, Yoonseon}, title = {INSIGHT Bench: Towards Grounded IN-SItu Guidance for Robotic ManipulaTion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35070-35079} }
PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jianqi and Zhang, Biao and Tang, Xiangjun and Wonka, Peter}, title = {PoseGAM: Robust Unseen Object Pose Estimation via Geometry-Aware Multi-View Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7197-7208} }
Event-based Motion Deblurring with Unpaired Data-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Hoonhee and Jeong, Yuhwan and Yoon, Kuk-Jin}, title = {Event-based Motion Deblurring with Unpaired Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {882-891} }
DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Bei and Li, Gaolei and Wu, Jun and Li, Jianhua}, title = {DualMirage: Hunting Stealthy Multimodal LLM Agents via CAPTCHAs with Contour and Adversarial Illusions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1523-1532} }
VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yang and Wu, Feize and Chen, Dave Zhenyu and Zhong, Yingji and Hong, Lanqing and Xu, Dan}, title = {VGGT-Det: Mining VGGT Internal Priors for Sensor-Geometry-Free Multi-View Indoor 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4708-4717} }
Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jing and Yang, Sen and Duan, Boqiang and Dai, Ming and Zhang, Wei and Tan, Xiao and Chen, Kunbin and He, Wei and Wang, Jingdong and Wang, Hanli}, title = {Hugging Visual Prompt and Segmentation Tokens: Consistency Learning for Fine-Grained Visual Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5175-5186} }
From Scale to Speed: Adaptive Test-Time Scaling for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Xiangyan and Yuan, Zhenlong and Tang, Jing and Chen, Rui and Tang, Datao and Yu, Meng and Sun, Lei and Bai, Yancheng and Chu, Xiangxiang and Gou, Gaopeng and Xiong, Gang and Cai, Yujun}, title = {From Scale to Speed: Adaptive Test-Time Scaling for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23272-23282} }
FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search-
[pdf]
[supp]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Yanling and Zhang, Shanshan and Chen, Di and Yang, Jian}, title = {FSLoRA: Harmonizing Detection and Re-Identification via Freq-Spatial Low-Rank Adapter for One-Stage Person Search}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40428-40437} }
Learning to Track Instance from Single Nature Language Description-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yaozong and Zhong, Bineng and Liang, Qihua and Zeng, Shuimu and Xia, Haiying and Song, Shuxiang}, title = {Learning to Track Instance from Single Nature Language Description}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20922-20931} }
Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Minho and Park, Sunghyun and Lee, Jungsoo and Park, Hyojin and Hwang, Kyuwoong and Porikli, Fatih and Choo, Jaegul and Choi, Sungha}, title = {Concept-Aware LoRA for Domain-Aligned Segmentation Dataset Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39858-39868} }
UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chuanrui and Zou, Yingshuang and Wu, ZhengXian and Ling, Yonggen and Yang, Yuxiao and Wang, Ziwei}, title = {UniPR: Unified Object-level Real-to-Sim Perception and Reconstruction from a Single Stereo Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4667-4676} }
DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zitong and Zhao, Hang and Zhou, Qianyu and Lu, Xuequan and Li, Xiangtai and Yang, Hao and Yang, Bo and Song, Yiren}, title = {DiffDecompose: Layer-Wise Decomposition of Alpha-Composited Images via Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4624-4634} }
POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Haicheng and Liu, Yuan and Liu, Yikun and Yu, Zhemeng and Zhao, Zhongyin and You, Yangxiu and Yu, Zilin and Tian, Le and Xiao, Zhou and Zhou, Jie and Xie, Weidi and Wang, Yanfeng}, title = {POINTS-Long: Adaptive Dual-Mode Visual Reasoning in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19119-19131} }
VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bulat_2026_CVPR, author = {Bulat, Adrian and Baldrati, Alberto and Metaxas, Ioannis Maniadis and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {VISion On Request: Enhanced VLLM efficiency with sparse, dynamically selected, vision-language interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31920-31930} }
HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zihao and Zou, Nan and Zeng, Jiandian and Li, Guo and Chen, Ke and Li, Boyuan and Wang, Tian}, title = {HiLoRA: Hierarchical Low-Rank Adaptation for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31746-31757} }
ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Anzhe and Duan, Shukai and Li, Shixuan and Yin, Chenzhong and Cheng, Mingxi and Ping, Heng and Chattopadhyay, Tamoghna and Thomopoulos, Sophia I. and Nazarian, Shahin and Thompson, Paul and Bogdan, Paul}, title = {ERMoE: Eigen-Reparameterized Mixture-of-Experts for Stable Routing and Interpretable Specialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12997-13006} }
EventDrive: Event Cameras for Vision-Language Driving Intelligence-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Dongyue and Li, Rong and Liang, Ao and Kong, Lingdong and Yin, Wei and Ng, Lai Xing and Cottereau, Benoit R. and Chane, Camille Simon and Ooi, Wei Tsang}, title = {EventDrive: Event Cameras for Vision-Language Driving Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22312-22322} }
Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Zhenghao and Ding, Wenhao and You, Yurong and Chen, Yuxiao and Luo, Wenjie and Tian, Thomas and Cao, Yulong and Sharma, Apoorva and Xu, Danfei and Ivanovic, Boris and Li, Boyi and Wang, Yan and Pavone, Marco}, title = {Counterfactual VLA: Self-Reflective Vision-Language-Action Model with Adaptive Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4022-4031} }
TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Tu_2026_CVPR, author = {Tu, Zhi and Niu, Liangkun and Zhang, Tianyi}, title = {TrafficAlign: Aligning Large Language Models for Traffic Scenario Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39744-39754} }
Reward Sharpness-Aware Fine-Tuning for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Kwanyoung and Sim, Byeongsu}, title = {Reward Sharpness-Aware Fine-Tuning for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36051-36061} }
SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging-
[pdf]
[supp]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuqiao and Liu, Xiaoyan and Mao, Jianxu and Wang, Yaonan and Zhang, Hui and Liu, Lizhu and Chen, Yurong and He, Wenbin}, title = {SGDE: Self-supervised Geometry Degradation Estimation Framework for Coded Aperture Compressive Spectral Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34084-34094} }
Evaluating Generative Models via One-Dimensional Code Distributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Zexi and Luo, Pengcheng and Zhong, Yijia and Zhang, Jinchao and Zhou, Jie}, title = {Evaluating Generative Models via One-Dimensional Code Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17077-17086} }
Dynamic Momentum Recalibration in Online Gradient Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2026_CVPR, author = {Yao, Zhipeng and Yu, Rui and Chang, Guisong and Li, Ying and Zhang, Yu and Li, Dazhou}, title = {Dynamic Momentum Recalibration in Online Gradient Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12902-12912} }
AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2026_CVPR, author = {Qu, Zhen and Tao, Xian and Bao, Xiaoyi and Wang, Dingrong and Qu, ShiChen and Zhang, Zhengtao and Wang, Xingang}, title = {AG-VAS: Anchor-Guided Zero-Shot Visual Anomaly Segmentation with Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14126-14136} }
MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruoxuan and Zheng, Qiyun and Zhou, Zhiyu and Liao, Ziqi and Wu, Siyu and Jiang-Lin, Jian-Yu and Wen, Bin and Xie, Hongxia and Fu, Jianlong and Cheng, Wen-Huang}, title = {MindPower: Enabling Theory-of-Mind Reasoning in VLM-based Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29631-29641} }
ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation-
[pdf]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Huan and Chen, Yihan and Wang, Chuxin and Liu, Nailong and Yang, Wenfei and Zhang, Tianzhu}, title = {ComPose: A Unified Completion-Pose Framework for Robust Category-Level Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14315-14324} }
Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zitang and Yoshimura, Masakazu and Otsuka, Junji and Irie, Atsushi and Ohashi, Takeshi}, title = {Online Data Curation for Object Detection via Marginal Contributions to Dataset-level Average Precision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32787-32797} }
LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Mingyang and Khan, Numair and Wang, Tianfu and Dhingra, Naina and Nam, Seonghyeon and Yang, Haitao and Hui, Zhuo and Metzler, Christopher and Vedaldi, Andrea and Pirsiavash, Hamed and Luo, Lei}, title = {LaVR: Scene Latent Conditioned Generative Video Trajectory Re-Rendering using Large 4D Reconstruction Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25450-25460} }
HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Jiang, Xinru and Xiao, Xi and Fan, Qihui and Lu, Lei and Wang, Yanzhi and Lin, Xue and Camps, Octavia and Zhao, Pu and Gu, Jianyang}, title = {HierAmp: Coarse-to-Fine Autoregressive Amplification for Generative Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41688-41698} }
Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Ziqian and Zhao, Xinqiao and Wang, Xiaolei and Zhang, Quan and Xiao, Jimin}, title = {Leveraging Class Distributions in CLIP for Weakly Supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34714-34723} }
Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Rui and Shao, Shitong and zhou, zikai and Zhao, Pukun and Guo, Hangyu and Ye, Tian and Bai, Lichen and Yang, Shuo and Xie, Zeke}, title = {Accelerating Diffusion Model Training under Minimal Budgets: A Condensation-Based Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43621-43631} }
Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Le and Huang, Yan and Xu, Zhen and Xu, Yong and Wong, Hau-San and Wu, Si}, title = {Defect Cue-Preserved Structural Feature Refinement for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35607-35616} }
Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Boyu and Xia, Jun and Chen, Mingsong}, title = {Robust3DGSW: Toward Robust Watermarking for Quantization-Aware 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19076-19084} }
RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Junjin and Yang, Yandan and Chang, Xinyuan and Chen, Ronghan and Xiong, Feng and Xu, Mu and Zheng, Wei-Shi and Zhang, Qing}, title = {RehearseVLA: Simulated Post-Training for VLAs with Physically-Consistent World Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20867-20877} }
Neural Mixture Density Processes-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Yi and Tao, Qi and Liang, Xingxing and Zhang, Longfei and Lv, Yiqin and Song, Weitao and Yang, Fangjie and Wang, Cheems and Cheng, Guangquan}, title = {Neural Mixture Density Processes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39680-39690} }
Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Haoru and He, Tairan and Wang, Zi and Ben, Qingwei and Xiao, Wenli and Luo, Zhengyi and Da, Xingye and Casta\~neda, Fernando and Shi, Guanya and Sastry, Shankar and Fan, Linxi and Zhu, Yuke}, title = {Opening the Sim-to-Real Door for Humanoid Pixel-to-Action Policy Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6642-6652} }
Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guojun and Zhang, Mingyang and Xiang, Jianwen and Tan, Cheng and Yang, Yanchao and Zhou, Junwei}, title = {Distributed Image Compression with Multimodal Side Information at Extremely Low Bitrates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33662-33671} }
Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Dong and Sun, Huaijiang and Liu, Fan and Zheng, Yuhui}, title = {Progressive Guessing to Fixed Point: Rethinking Human Motion Prediction with Deep Equilibrium Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16454-16463} }
STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peixuan and Jia, Zijian and Liu, Kaiqi and Weng, Shuchen and Li, Si and Shi, Boxin}, title = {STAGE: Storyboard-Anchored Generation for Cinematic Multi-shot Narrative}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {659-669} }
FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Yucheng and Liang, Jiajun and Cui, Kaiqian and Zhao, Baoquan and Xie, Haoran and Liu, Wei and Li, Qing and Mao, Xudong}, title = {FreqEdit: Preserving High-Frequency Features for Robust Multi-Turn Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43525-43535} }
Affordance-First Decomposition for Continual Learning in Video-Language Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{xu_2026_CVPR, author = {xu, Mengzhu and Liu, Hanzhi and Peng, Ningkang and Chen, Qianyu and Xiao, Canran}, title = {Affordance-First Decomposition for Continual Learning in Video-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3908-3919} }
NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jisoo and Oh, Heeseok}, title = {NEAF: Natural Image Editing with Attention Fusion for Generalizable Test-time Optimization in Text-Guided Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22509-22518} }
How Much 3D Do Video Foundation Models Encode?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zixuan and Li, Xiang and Lv, Zhaoyang and Rehg, James M.}, title = {How Much 3D Do Video Foundation Models Encode?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {384-394} }
VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Van Thanh and Fu, Yun}, title = {VisiLock: Authorizing Instruction-based Image editing with Dual Score Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15710-15718} }
AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Bhat_2026_CVPR, author = {Bhat, S Divakar and More, Amit Popat and Soni, Mudit and Aggarwal, Bhuvan}, title = {AdaPrior: Bayesian-Inspired Adaptive Prior Correction for Long-Tailed Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10840-10850} }
Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jianting and Yu, Dianzhi and King, Irwin}, title = {Smart Replay: Adaptive Scheduling of Memory Rehearsal for Computational Resource-Aware Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39945-39961} }
Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Baoteng and Zang, Xianghao and Wang, Xinran and Na, Xiangyu and He, Zhixiang and Sun, Hao and Zhang, Chi and He, Zhongjiang and Cao, Tianwei and Liang, Kongming and Ma, Zhanyu}, title = {Curriculum Group Policy Optimization: Adaptive Sampling for Unleashing the Potential of Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {561-571} }
Towards Sparse Video Understanding and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chenwei and Ye, Zhen and Wu, Shang and Li, Weijian and Wang, Zihan and Xia, Zhuofan and Lu, Lie and Maneriker, Pranav and Du, Fan and Li, Manling and Liu, Han}, title = {Towards Sparse Video Understanding and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11357-11368} }
FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Yuchen and Shao, Huikai and Fang, Lihuang and Xiong, Zhipeng and Zhong, Dexing}, title = {FlowPalm: Optical Flow Driven Non-Rigid Deformation for Geometrically Diverse Palmprint Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23591-23600} }
When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2026_CVPR, author = {Xiang, Yongli and Hong, Ziming and Wang, Zhaoqing and Zhao, Xiangyu and Han, Bo and Liu, Tongliang}, title = {When Safety Collides: Resolving Multi-Category Harmful Conflicts in Text-to-Image Diffusion via Adaptive Safety Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14746-14755} }
Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yawen and Li, Feng and Kong, Shuqi and Diao, Yunfeng and Gao, Xinjian and Shi, Zenglin and Wang, Meng}, title = {Layer Consistency Matters: Elegant Latent Transition Discrepancy for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38111-38121} }
PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Meng_2026_CVPR, author = {Meng, Ziqiao and Wang, Qichao and Dou, Zhiyang and Song, Zixing and Zhou, Zhipeng and King, Irwin and Zhao, Peilin}, title = {PointNSP: Autoregressive 3D Point Cloud Generation with Next-Scale Level-of-Detail Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31450-31461} }
Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Chuang and Tao, Renshuai and Ren, Zhongwei and Liu, Xianglong and Wei, Yunchao}, title = {Can a Second-View Image Be a Language? Geometric and Semantic Cross-Modal Reasoning for X-ray Prohibited Item Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26176-26186} }
TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Yiheng and Lin, Yi and Huang, Shilong and Yang, Xiyan and Yang, Xin}, title = {TIM: Temporal Decoupling with Iterative Mutual-Refinement Model for Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6951-6961} }
Prompt-Free Universal Region Proposal Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Qihong and Liu, Changhan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {Prompt-Free Universal Region Proposal Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13080-13090} }
Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quanyu and Han, Zhongyi and Sun, Hao and Gong, Yongshun and Wang, Xiaoyan and Yin, Yilong and Li, Shuo}, title = {Stabilizing Feature Geometry in Noisy Pretrained Models for Robust Downstream Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {789-800} }
Fast Spatial Tracking with Visual Geometry Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Chengjie and Wu, Guile and Bai, Dongfeng and Liu, Bingbing}, title = {Fast Spatial Tracking with Visual Geometry Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {374-383} }
RankOOD - Class Ranking-based Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Denipitiyage_2026_CVPR, author = {Denipitiyage, Dishanika and Karunanayake, Naveen and Seneviratne, Suranga and Chawla, Sanjay}, title = {RankOOD - Class Ranking-based Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42280-42289} }
SASNet: Spatially-Adaptive Sinusoidal Networks for INRs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Haoan and Aldana, Diana and Novello, Tiago and De Floriani, Leila}, title = {SASNet: Spatially-Adaptive Sinusoidal Networks for INRs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41964-41973} }
Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jun and Lou, Xuhang and Wang, Jinpeng and Wang, Yuting and Wang, Yaowei and Xia, Shu-Tao and Chen, Bin}, title = {Imagine Before Concentration: Diffusion-Guided Registers Enhance Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9710-9721} }
YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Xu and Peng, Jinlong and Gan, Zhenye and Zhu, Jiawen and Liu, Jun}, title = {YOLO-Master: MOE-Accelerated with Specialized Transformers for Enhanced Real-time Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18440-18449} }
IncreFA: Breaking the Static Wall of Generative Model Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2026_CVPR, author = {Qin, Haotian and Chang, Dongliang and Gao, Yueying and Tan, Yuexuan and Chen, Lei and Ma, Zhanyu}, title = {IncreFA: Breaking the Static Wall of Generative Model Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35405-35415} }
Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2026_CVPR, author = {Xue, Han and Min, Nan and Liu, Xiaotong and Chen, Wendi and Fang, Yuan and Lv, Jun and Lu, Cewu and Wen, Chuan}, title = {Rethinking Camera Choice: An Empirical Study on Fisheye Camera Properties in Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35059-35069} }
VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jingyang and Wu, Jialian and Liu, Jiang and Sun, Ximeng and Wang, Ze and Yu, Xiaodong and Luo, Jiebo and Liu, Zicheng and Barsoum, Emad}, title = {VideoSeek: Long-Horizon Video Agent with Tool-Guided Seeking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5465-5475} }
BAMI: Training-Free Bias Mitigation in GUI Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Borui and Zhang, Bo and Wang, Bo and Zheng, Wenzhao and Cheng, Yuhao and Tang, Liang and Yan, Yiqiang and Zhou, Jie and Lu, Jiwen}, title = {BAMI: Training-Free Bias Mitigation in GUI Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34596-34605} }
Cinematic Audio Source Separation Using Visual Cues-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Kang and Lee, Suyeon and Senocak, Arda and Chung, Joon Son}, title = {Cinematic Audio Source Separation Using Visual Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37874-37884} }
GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Ning and Ge, Zhenyu and Han, Feng and Sun, Yuhua and Li, Chengqing and Chen, Jingjing}, title = {GrOCE : Graph-Guided Online Concept Erasure for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43536-43545} }
PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Yuanhao and Zhang, Shaofeng and Jia, Xiaosong and Fan, Qi}, title = {PointAlign: Feature-Level Alignment Regularization for 3D Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22100-22110} }
NeAR: Coupled Neural Asset-Renderer Stack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hong and Ye, Chongjie and Chen, Houyuan and Xiao, Weiqing and Yan, Ziyang and Xiao, Lixing and Chen, Zhaoxi and Xiang, Jianfeng and Xu, Shaocong and Liu, Xuhui and Wang, Yikai and Zhang, Baochang and Han, Xiaoguang and Yang, Jiaolong and Zhao, Hao}, title = {NeAR: Coupled Neural Asset-Renderer Stack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29834-29844} }
MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Nie_2026_CVPR, author = {Nie, Zhanheng and Fu, Chenghan and Zhang, Daoze and Wu, Junxian and Guan, Wanxian and Wang, Pengjie and Xu, Jian and Zheng, Bo}, title = {MOON2.0: Dynamic Modality-balanced Multimodal Representation Learning for E-commerce Product Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22975-22985} }
ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Kaiwen and Cui, Jinkai and Zhang, Juyong}, title = {ProgressiveAvatars: Progressive Animatable 3D Gaussian Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32518-32527} }
ApET: Approximation-Error Guided Token Compression for Efficient VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Qiankun and Zhang, Ziyao and Wang, Haofei and Song, Zhen and Chen, Jie and Zheng, Hairong}, title = {ApET: Approximation-Error Guided Token Compression for Efficient VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26306-26316} }
Scene Grounding in the Wild-
[pdf]
[supp]
[bibtex]@InProceedings{Cohen_2026_CVPR, author = {Cohen, Tamir and Segre, Leo and Shomer-Chai, Shay and Avidan, Shai and Averbuch-Elor, Hadar}, title = {Scene Grounding in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33353-33363} }
Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hao and Wang, Hu and Zheng, Tiantian and Bhattarai, Prajjwal and Alhanai, Tuka}, title = {Polyphony: Diffusion-based Dual-Hand Action Segmentation with Alternating Vision Transformer and Semantic Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20098-20107} }
EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saroha_2026_CVPR, author = {Saroha, Abhishek and Zeng, Huajian and Zuo, Xingxing and Cremers, Daniel and Wang, Xi}, title = {EgoFlow: Gradient-Guided Flow Matching for Egocentric 6DoF Object Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4332-4342} }
Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Baoheng and Liu, Jiahui and Zhao, Gui and Zhang, Weizhou and Ma, Yixuan and Jiang, Jun and Chen, Yingxian and Fok, Wilton W.T. and Qi, Xiaojuan and So, Hayden Kwok-Hay}, title = {Learning to See through Illumination Extremes with Event Streaming in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26198-26208} }
All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2026_CVPR, author = {Tian, Xinyu and Zou, Shu and Yang, Zhaoyuan and He, Mengqi and Tu, Peter and Zhang, Jing}, title = {All Roads Lead to Rome: Incentivizing Divergent Thinking in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33488-33498} }
MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents-
[pdf]
[supp]
[bibtex]@InProceedings{Sheng_2026_CVPR, author = {Sheng, Yijun and Zhu, Shipeng and Zuo, Ruijia and Nie, Na and Xue, Hui}, title = {MCHDoc: A Comprehensive Benchmark for Reading Multi-Carrier Chinese Historical Documents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38722-38731} }
GM-R^2: Generative Matching Learning for Unsupervised Geometric Representation and Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haobo and Yu, Liang and Zheng, Jianmin}, title = {GM-R{\textasciicircum}2: Generative Matching Learning for Unsupervised Geometric Representation and Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31430-31439} }
Low-Rank Residual Diffusion Models-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Junfu and Yuan, Jiang}, title = {Low-Rank Residual Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35747-35757} }
VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Xinyao and He, Qiyuan and Xu, Kai and Qu, Xiaoye and Li, Yicong and Wei, Wei and Yao, Angela}, title = {VA-p: Variational Policy Alignment for Pixel-Aware Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12787-12797} }
ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Mo_2026_CVPR, author = {Mo, Canyu and Liu, Yongxiang and Zhang, Jiehua and Yu, Zilong and Liu, Zhen and Liu, Tianpeng and Liu, Li}, title = {ORSATR-X: A Foundation Model based on Differential-and-Excitation Networks for Optical Remote Sensing Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27849-27860} }
GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Jingxuan and Jia, Caijun and Bai, Xi and Xu, Xinglong and Li, Siyuan and Sun, Linzhuang and Yu, Bihui and He, Conghui and Wu, Lijun and Tan, Cheng}, title = {GGBench: A Geometric Generative Reasoning Benchmark for Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5199-5210} }
NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction-
[pdf]
[supp]
[bibtex]@InProceedings{Zarar_2026_CVPR, author = {Zarar, Muhammad and Zhang, Mingzheng and Zhang, Xiaowang and Feng, Zhiyong}, title = {NeuroRule: Bridging Vision and Logic with Differentiable Rule Induction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11654-11663} }
Learning to Act Robustly with View-Invariant Latent Actions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Youngjoon and Chun, Junha and Kim, Taesup}, title = {Learning to Act Robustly with View-Invariant Latent Actions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6781-6790} }
Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Turkulainen_2026_CVPR, author = {Turkulainen, Matias and Krishnan, Akshay and Aleotti, Filippo and Sayed, Mohamed and Garcia-Hernando, Guillermo and Kannala, Juho and Solin, Arno and Brostow, Gabriel and Turmukhambetov, Daniyar}, title = {Cross-View Splatter: Feed-Forward View Synthesis with Georeferenced Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40959-40970} }
Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing-
[pdf]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Wenxue and Li, Hualin and Qin, Yuhang and Xu, Yifu and Fan, Xiaopeng and Zhao, Debin}, title = {Beyond Single Solution: Multi-Hypothesis Deep Unfolding Network for Image Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5283-5293} }
Chain of Event-Centric Causal Thought for Physically Plausible Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zixuan and Hu, Yixin and Wang, Haolan and Chen, Feng and Liu, Yan and Li, Wen and Lei, Yinjie}, title = {Chain of Event-Centric Causal Thought for Physically Plausible Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38122-38131} }
SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting-
[pdf]
[supp]
[bibtex]@InProceedings{Chang_2026_CVPR, author = {Chang, Wenjie and Ding, Tianle and Yang, Wenfei and Zhang, Tianzhu}, title = {SunFaded: Illumination-Aware Gaussian Splatting for Dark Scenes with Camera-Mounted Active Lighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40876-40885} }
MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Junha and Jo, Yongsik and Min, So Yeon and Xie, Quanting and Kim, Taehwan and Bisk, Yonatan and Choo, Jaegul}, title = {MM-SeR: Multimodal Self-Refinement for Lightweight Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30929-30940} }
Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Jiahuan and Jiang, Tingsong and Liu, Hanqing and Ma, Chao and Zhou, Weien and Yang, Yang and Yao, Wen}, title = {Thermally Activated Dual-Modal Adversarial Clothing against AI Surveillance Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34929-34939} }
Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Fengpeng and Tu, Zhilin and Li, Yuanman and Li, Xiong and Zhou, Jiantao}, title = {Zero-shot Detection of AI-Generated Image via RAW-RGB Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42997-43007} }
Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaolei and Wang, Yuexin and Dai, Tianhong and Bai, Huihui and Zhao, Yao and Xiao, Jimin}, title = {Hunting Normality from Query Sample via Residual Learning for Generalist Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43093-43102} }
TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Peng and Wang, Yuting and Wang, Qiurui}, title = {TacSIm: A Dataset and Benchmark for Football Tactical Style Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20014-20023} }
See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhiheng and Wang, Tong and Wang, Shuning and Liu, Naiming and Zhang, Yumeng}, title = {See Further, Think Deeper: Advancing VLM's Reasoning Ability with Low-level Visual Cues and Reflection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18870-18880} }
OccAny: Generalized Unconstrained Urban 3D Occupancy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Vu, Tuan-Hung}, title = {OccAny: Generalized Unconstrained Urban 3D Occupancy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28599-28609} }
MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chenran and Wu, Ruiqi and Zhou, Tao and Zhou, Yi}, title = {MedKCO: Medical Vision-Language Pretraining via Knowledge-Driven Cognitive Orchestration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35260-35269} }
MOGeo: Beyond One-to-One Cross-View Object Geo-localization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Bo and Zhang, Qingwang and Wu, Le and Li, Yuanyuan and Zhu, Yingying}, title = {MOGeo: Beyond One-to-One Cross-View Object Geo-localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26422-26431} }
Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Mingxuan and Li, Shuang and Zhang, Yutang and Geng, Jing and Shen, Yirui and Kang, Jingxuan and Zhuang, Fuzhen and Wang, Shuigen}, title = {Thermal Diffusion Matters: Infrared Spatial-Temporal Video Super-Resolution through Heat Conduction Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2146-2155} }
BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Haoxuan and Niu, Guanglin}, title = {BIT: Matching-based Bi-directional Interaction Transformation Network for Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40386-40396} }
TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lionar_2026_CVPR, author = {Lionar, Stefan and Lee, Gim Hee}, title = {TeamHOI: Learning a Unified Policy for Cooperative Human-Object Interactions with Any Team Size}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37121-37132} }
Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Zhuwei and Xia, Zimin and Chen, He and Yue, Linwei and Zheng, Xianwei}, title = {Regulating Rather than Constraining: Adaptive Guidance for Complex Spectral Reconstruction in Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34832-34842} }
Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Zheng and Meng, Debin and Miao, Yunqi and Zhang, Zhensong and Xu, Songcen and Patras, Ioannis and Song, Jifei}, title = {Diffusion-Based Makeup Transfer with Facial Region-Aware Makeup Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4656-4666} }
SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiale and Wang, Shangfei}, title = {SAQN: Semantic-based Adaptive Query Network for 3D Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38627-38636} }
Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Yeh_2026_CVPR, author = {Yeh, Chun-Hsiao and Qian, Shengyi and Wang, Manchen and Ma, Yi and Tighe, Joseph and Xiao, Fanyi}, title = {Beyond 3D VQAs: Injecting 3D Spatial Priors into Vision-Language Models for Enhanced Geometric Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16723-16733} }
Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhengxuan and Yang, Qinhui and Zhuang, Yiyu and Guo, Chuan and Zuo, Xinxin and Long, Xiaoxiao and Yao, Yao and Cao, Xun and Shen, Qiu and Zhu, Hao}, title = {Pressure2Motion: Hierarchical Human Motion Reconstruction from Ground Pressure with Text Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23495-23505} }
Making the Classification Explanation Faithful to the Confidence Score-
[pdf]
[supp]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Jian-Xun and Pan, Lu and Li, Weisheng}, title = {Making the Classification Explanation Faithful to the Confidence Score}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38959-38968} }
Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models-
[pdf]
[supp]
[bibtex]@InProceedings{Pramanik_2026_CVPR, author = {Pramanik, Vishal and Maliha, Maisha and Jha, Susmit and Velasquez, Alvaro and Kotevska, Olivera and Jha, Sumit Kumar}, title = {Selective Amnesia using Contrastive Subnet Erasure for Class Level Unlearning in Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31662-31671} }
VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Zeqing and Ramanagopal, Mani and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {VT-Intrinsic: Physics-Based Decomposition of Reflectance and Shading using a Single Visible-Thermal Image Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41752-41761} }
Exemplar-Free Continual Learning for State Space Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Isaac Ning and Mahmoodi, Leila and Le, Trung and Harandi, Mehrtash}, title = {Exemplar-Free Continual Learning for State Space Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25077-25087} }
S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation-
[pdf]
[bibtex]@InProceedings{Qing_2026_CVPR, author = {Qing, Yuhao and Wang, Yueying and Chen, Chaoyang and Zhang, Weidong and Wen, Jie and Xu, Xin}, title = {S2C2Seg: Semantic-Spatial Consistency and Category Optimization for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6293-6303} }
AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Adalibieke_2026_CVPR, author = {Adalibieke, Jianibieke and Han, Qianwei and Liu, Xueyi and Qin, Yuzhe and Yi, Li}, title = {AdaDexTrack: Dynamic Modulation for Adaptive and Generalizable Dexterous Manipulation Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28021-28031} }
Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhe and Chi, Cheng and Wei, Yangyang and Zhu, Boan and Huang, Tao and Sun, Zhenguo and Peng, Yibo and Wang, Pengwei and Wang, Zhongyuan and Liu, Fangzhou and Xu, Chang and Zhang, Shanghang}, title = {Do You Have Freestyle? Expressive Humanoid Locomotion via Audio Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {956-965} }
LightRR: A Lightweight Network for Single Image Reflection Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Wenbin and Zhang, Junkang and Yang, Sunzhe and Fang, Faming and Zhang, Guixu}, title = {LightRR: A Lightweight Network for Single Image Reflection Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19706-19715} }
Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yuzhi and Wen, Kairun and Gao, Rongxin and Liu, Dongxuan and Lou, Yibin and Wu, Jie and Xu, Jing and Zhang, Jian and Yang, Zheng and Lin, Yunlong and Li, Chenxin and Pan, Panwang and Lu, Junbin and Jiang, Jingyan and Ding, Xinghao and Huang, Yue and Wang, Zhi}, title = {Thinking in Dynamics: How Multimodal Large Language Models Perceive, Track, and Reason Dynamics in Physical 4D World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33446-33456} }
3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhicheng and Yu, Haoyi and Li, Boyan and Zhang, Dayou and Cao, Zijian and Gong, Tianyi and Liu, Junhua and Cui, Shuguang and Wang, Fangxin}, title = {3DReflecNet: A Large-Scale Dataset for 3D Reconstruction of Reflective, Transparent, and Low-Texture Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7244-7255} }
PHAC: Promptable Human Amodal Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noh_2026_CVPR, author = {Noh, Seung Young and Chang, Ju Yong}, title = {PHAC: Promptable Human Amodal Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30750-30760} }
Spherical Leech Quantization for Visual Tokenization and Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yue and Jiang, Hanwen and Xu, Zhenlin and Yang, Chutong and Adeli, Ehsan and Kraehenbuehl, Philipp}, title = {Spherical Leech Quantization for Visual Tokenization and Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12913-12923} }
Outlier-Robust Diffusion Solvers for Inverse Problems-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yang and Liu, Jiahua and Pang, Tongyao and Li, Wen and Liu, Zhaoqiang}, title = {Outlier-Robust Diffusion Solvers for Inverse Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30782-30791} }
C^2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jiayang and Zheng, Tianyi and Zou, Jiayang and Yang, Fengxiang and Liu, Shice and Fan, Luyao and Zhang, Zheyu and Zhang, Hao and Chen, Jinwei and Jiang, Peng-Tao and Li, Bo and Wang, Jia}, title = {C{\textasciicircum}2FG: Control Classifier-Free Guidance via Score Discrepancy Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34398-34407} }
MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxuan and Xu, Wei and Guo, Qi}, title = {MetaSpectra+: A Compact Broadband Metasurface Camera for Snapshot Hyperspectral+ Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {209-219} }
Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aydin_2026_CVPR, author = {Aydin, M. Kerem and Saragadam, Vishwanath and Alexander, Emma}, title = {Thermal is Always Wild: Characterizing and Addressing Challenges in Thermal-Only Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29845-29854} }
RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Junwei and Dai, Ruize and Liu, Ruiping and Zeng, Zichao and Chen, Yufan and Wang, Fangjinhua and Peng, Kunyu and Yang, Kailun and Zhang, Jiaming and Stiefelhagen, Rainer}, title = {RHO: Robust Holistic OSM-Based Metric Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33727-33737} }
OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Zhijia and Li, Jiaming and Chen, Weikai and Zhang, Yanhao and Lu, Haonan and Li, Guanbin}, title = {OASIS: On-Demand Hierarchical Event Memory for Streaming Video Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2821-2831} }
Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, JangHyeon and Dias, Philipe Ambrozio and Chiang, Yao-Yi and Lunga, Dalton}, title = {Beyond What's Shared: Recovering Lost Unique Information from Intermediate Layers to Boost Multimodal Geo-Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1585-1595} }
SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yukai and Li, Weiyu and Wang, Zihao and Li, Hongyang and Chen, Xingyu and Tan, Ping and Zhang, Lei}, title = {SceneMaker: Open-set 3D Scene Generation with Decoupled De-occlusion and Pose Estimation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27146-27156} }
VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yikang and Fan, Rui}, title = {VAD-GS: Visibility-Aware Densification for 3D Gaussian Splatting in Dynamic Urban Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4953-4962} }
Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hayeon and Jang, Ji Ha and Kim, Junghun James and Chun, Se Young}, title = {Uncertainty-guided Compositional Alignment with Part-to-Whole Semantic Representativeness in Hyperbolic Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36861-36870} }
Self-Critical Distillation Network for Video-based Commonsense Captioning-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Mengqi and Jia, Gengyun and Bao, Bing-Kun}, title = {Self-Critical Distillation Network for Video-based Commonsense Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40527-40536} }
Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chung-Shien Brian and Schmidt, Christian and Piekenbrinck, Jens and Leibe, Bastian}, title = {Block-Sparse Global Attention for Efficient Multi-View Geometry Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14546-14555} }
TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chuang and Cao, Yichao and Su, Xiu and Zhu, Haogang}, title = {TANGO: Learning Distribution-wise Foundation Prior Consistency and Instance-wise Style Calibration for Medical Image Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8545-8555} }
A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Mirza_2026_CVPR, author = {Mirza, Mujtaba Hussain and D'Orazio, Antonio and Melamed, Odelia and Masi, Iacopo}, title = {A Provable Energy-Guided Test-Time Defense Boosting Adversarial Robustness of Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8598-8609} }
TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Yearang and Kim, Ho-Joong and Lee, Seong-Whan}, title = {TF-CADE: Foreground-Concentrated Text-Video Alignment for Zero-Shot Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2843-2852} }
Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Delong and Hou, Haotian and Hou, Zhaohui and Huang, Zhiyuan and Han, Shihao and Zhan, Mingjie and Zhao, Zhicheng and Su, Fei}, title = {Inter-Edit: First Benchmark for Interactive Instruction-Based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37290-37300} }
Emergent Outlier View Rejection in Visual Geometry Grounded Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Jisang and Hong, Sunghwan and Jung, Jaewoo and Jang, Wooseok and An, Honggyu and Wang, Qianqian and Kim, Seungryong and Feng, Chen}, title = {Emergent Outlier View Rejection in Visual Geometry Grounded Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {427-437} }
TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Harold Haodong and Lan, Disen and Shu, Wen-Jie and Liu, Qingyang and Wang, Zihan and Chen, Sirui and Cheng, Wenkai and Chen, Kanghao and Zhang, Hongfei and Zhang, Zixin and Guo, Rongjin and Cheng, Yu and Chen, Ying-Cong}, title = {TiViBench: Benchmarking Think-in-Video Reasoning for Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11403-11413} }
AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ruikai and Li, Xinrun and Xie, Mengwei and Shan, Hao and Qiu, Shoumeng and Chang, Xinyuan and Fan, Yizhe and Xiong, Feng and Jiang, Han and Ren, Yilong and Yu, Haiyang and Xu, Mu and Long, Yang and Ojha, Varun and Cui, Zhiyong}, title = {AMap: Distilling Future Priors for Ahead-Aware Online HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24906-24917} }
SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhiyu and Yan, Weilong and Shi, Yufei and Meng, Xin and He, Tao and Zhuang, Huiping and Li, Ming and Fan, Hehe}, title = {SciEducator: Scientific Video Understanding and Educating via Deming-Cycle Multi-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26497-26507} }
Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Iliash_2026_CVPR, author = {Iliash, Denys and Liu, Jiayi and Fokin, Egor and Wu, Qirui and Amiri, Ali Mahdavi and Savva, Manolis and Chang, Angel X.}, title = {Artiverse: A Diverse and Physically Grounded Dataset for Articulated Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8932-8942} }
Guiding Diffusion Models with Semantically Degraded Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Shilong and Zhang, Yuming and Wang, Hongxia}, title = {Guiding Diffusion Models with Semantically Degraded Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43653-43663} }
Meta-CoT: Enhancing Granularity and Generalization in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shiyi and Cheng, Yiji and Hang, Tiankai and Yin, Zijin and He, Runze and Xu, Yu and Dai, Wenxun and Lin, Yunlong and Wang, Chunyu and Lu, Qinglin and Tang, Yansong}, title = {Meta-CoT: Enhancing Granularity and Generalization in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38004-38015} }
Z-Order Transformer for Feed-Forward Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Can and Liu, Lei and Jiang, Wei and Xu, Dong}, title = {Z-Order Transformer for Feed-Forward Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7362-7371} }
GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Jinyang and Dou, Fan and Quan, Wenrui and Liu, Shangxun and Xu, Yang and Zhang, Yuhe and Li, Kang and Geng, Guohua}, title = {GHPT: Real-Time Relightable Gaussian Splatting using Hybrid Path Tracing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25990-25999} }
AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Milton and Qin, Sizhong and Li, Yongzhi and Chen, Quan and Jiang, Peng}, title = {AutoCut: End-to-end advertisement video editing based on multimodal discretization and controllable generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37777-37787} }
WorldGen: From Text to Traversable and Interactive 3D Worlds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Dilin and Jung, Hyunyoung and Monnier, Tom and Sohn, Kihyuk and Zou, Chuhang and Xiang, Xiaoyu and Yeh, Yu-Ying and Liu, Di and Huang, Zixuan and Nguyen-Phuoc, Thu and Fan, Yuchen and Oprea, Sergiu and Wang, Ziyan and Shapovalov, Roman and Sarafianos, Nikolaos and Groueix, Thibault and Toisoul, Antoine and Dhar, Prithviraj and Chu, Xiao and Chen, Minghao and Park, Geon Yeong and Ranjan, Rakesh and Vedaldi, Andrea}, title = {WorldGen: From Text to Traversable and Interactive 3D Worlds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27124-27135} }
TempoControl: Temporal Attention Guidance for Text-to-Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Schiber_2026_CVPR, author = {Schiber, Shira and Lindenbaum, Ofir and Schwartz, Idan}, title = {TempoControl: Temporal Attention Guidance for Text-to-Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36670-36679} }
PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yijing and Chao, Mengjun and Wang, Luo and Zhao, Tianyang and Dai, Haizhao and Zhang, Yingliang and Yu, Jingyi and Shi, Yujiao}, title = {PanoVGGT: Feed-Forward 3D Reconstruction from Panoramic Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36444-36453} }
MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Xiaoyu and Wang, Chenyang and Wang, Jing and Zheng, Shunyuan and Meng, Quanling and Zhang, Shengping}, title = {MOFA-VTON: More Fashion Possibilities with Fine-Grained Adaptations in Virtual Try-On}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1895-1905} }
NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Fei and Xie, Shichao and Luo, Minghua and Chu, Zedong and Hu, Junjun and Wu, Xiaolong and Xu, Mu}, title = {NavForesee: A Unified Vision-Language World Model for Hierarchical Planning and Dual-Horizon Navigation Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32431-32440} }
RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls-
[pdf]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuzheng and Long, Yuxing and Kang, Lei and Guo, Yuchong and Yu, Ziyan and Mao, Shangqing and Zhang, Jiyao and Wu, Ruihai and Li, Dongjiang and Shen, Hui and Dong, Hao}, title = {RealAppiance: Let High-fidelity Appliance Assets Controllable and Workable as Aligned Real Manauls}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37186-37194} }
SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Jialiang and Shu, Han and Li, Wenshuo and Zhai, Yingjie and Chen, Xinghao}, title = {SJD-PAC: Accelerating Speculative Jacobi Decoding via Proactive Drafting and Adaptive Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16666-16675} }
NitroGen: An Open Foundation Model for Generalist Gaming Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Magne_2026_CVPR, author = {Magne, Lo{\"\i}c and Awadalla, Anas and Wang, Guanzhi and Xu, Yinzhen and Belofsky, Joshua and Hu, Fengyuan and Kim, Joohwan and Schmidt, Ludwig and Gkioxari, Georgia and Kautz, Jan and Yue, Yisong and Choi, Yejin and Zhu, Yuke and Fan, Linxi}, title = {NitroGen: An Open Foundation Model for Generalist Gaming Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21511-21521} }
Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yan and Shi, Yuzhu and Zhou, Kan and Zhang, Shu and He, Diqi and Zhang, Dingwen and Han, Junwei}, title = {Few-Shot Hybrid Incremental Learning:Continually Learning under Data Scarcity and Task Uncertainty}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32334-32344} }
VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Longmi and Gao, Pan}, title = {VEMamba: Efficient Isotropic Reconstruction of Volume Electron Microscopy with Axial-Lateral Consistent Mamba}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15585-15594} }
DepthFocus: Controllable Depth Estimation for See-Through Scenes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2026_CVPR, author = {Min, Junhong and Kim, Jimin and Kim, Minwook and Min, Cheol-Hui and Jeon, Youngpil and Choi, Minyong}, title = {DepthFocus: Controllable Depth Estimation for See-Through Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12595-12605} }
FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yucheng and Feng, Fu and Shi, Ruixiao and Shen, Jianlu and Wang, Jing and Rui, Yong and Geng, Xin}, title = {FINE: Factorizing Knowledge for Initialization of Variable-sized Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42018-42028} }
OctoNav: Towards Generalist Embodied Navigation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Chen and Jin, Liankai and Peng, Xingyu and Zhang, Jiazhao and Deng, Yue and Li, Annan and Wang, He and Liu, Si}, title = {OctoNav: Towards Generalist Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40074-40084} }
HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Hezhen and Zhao, Wangbo and Guo, Lanqing and Jiang, Hanwen and Liu, Jonathan C. and Fan, Zhiwen and Wang, Kai and Wang, Zhangyang and Pavlakos, Georgios}, title = {HumanNOVA: Photorealistic, Universal and Rapid 3D Human Avatar Modeling from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18096-18106} }
Semantic Alignment for Pose-Invariant Identity Preserving Diffusion-
[pdf]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jiwon and Kim, SeonHwa and Park, Soobin and Cha, Eunju and Jin, Kyong Hwan}, title = {Semantic Alignment for Pose-Invariant Identity Preserving Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43363-43372} }
ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Yingdong and Yan, Shaocheng and Zhao, Zhenjun and Kou, Yuan and Luo, Jianxin and Shi, Pengcheng and Li, Jiayuan}, title = {ULF-Loc: Unbiased Landmark Feature for Robust Visual Localization with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19065-19075} }
Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Rujiao and Li, Yang and Zhang, Xingyao and Wang, Weixun and Lin, Tianqianjin and Zhao, Xi and Xu, Yuchi and Su, Wenbo and Yan, Junchi and Zheng, Bo}, title = {Reasoning Palette: Modulating Reasoning via Latent Contextualization for Controllable Exploration for (V)LMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19463-19474} }
Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Nagrani_2026_CVPR, author = {Nagrani, Arsha and Uijlings, Jasper and Buch, Shyamal and Weyand, Tobias and Vijayanarasimhan, Sudheendra and Hu, Bo and Mehran, Ramin and A Ross, David and Schmid, Cordelia}, title = {Minerva-Ego: Spatiotemporal Hints for Egocentric Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38859-38869} }
CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Sijie and Han, Shiqin}, title = {CaReFlow: Cyclic Adaptive Rectified Flow for Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37799-37809} }
Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yanpeng and Hao, Jing and Zhu, Ke and Liu, Jiang-Jiang and Li, Xiaofan and Zhao, Na and Li, Zechao and Wang, Jingdong}, title = {Enhancing Descriptive Captions with Visual Attributes for Multimodal Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1683-1694} }
RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kaifa and Yang, Qi and Xu, Yiling and Li, Zhu}, title = {RAP: Fast Feedforward Rendering-Free Attribute-Guided Primitive Importance Score Prediction for Efficient 3D Gaussian Splatting Processing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33323-33332} }
Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Filntisis_2026_CVPR, author = {Filntisis, Panagiotis P. and Retsinas, George and Danecek, Radek and Sklyarova, Vanessa and Maragos, Petros and Bolkart, Timo}, title = {Registration-Free Learnable Multi-View Capture of Faces in Dense Semantic Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14512-14523} }
NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Dengdi and Zhou, Xiaoya and Wang, Xiao and Si, Hao and Lyu, Wanli and Tang, Jin and Luo, Bin}, title = {NESTOR: A Nested MOE-based Neural Operator for Large-Scale PDE Pre-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6147-6156} }
Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jialiang and Tong, Junlong and Lin, Junyan and Wu, Hao and Sun, Yirong and Ma, Yunpu and Shen, Xiaoyu}, title = {Think-as-You-See: Streaming Chain-of-Thought Reasoning for Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11998-12008} }
From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Yuqing and Yang, Yuchen and Yu, Rui and Li, Weilong and Guo, Xu and Yan, Huaicheng and Wang, Wei and Sun, Xiao}, title = {From Detection to Association: Learning Discriminative Object Embeddings for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6878-6888} }
Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Delin and Wang, Chaoli}, title = {Sketch2CT: Multimodal Diffusion for Structure-Aware 3D Medical Volume Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37600-37610} }
Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiaze and Xia, Shiyu and Lv, Jiaqi and Geng, Xin}, title = {Unlocking Pre-trained Weights: Parameter Inheritance for Zero-Shot Initialization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34502-34511} }
Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Bowen and Dewan, Shaurya and Birchfield, Stan}, title = {Fast-FoundationStereo: Real-Time Zero-Shot Stereo Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7513-7524} }
UniChange: Unifying Change Detection with Multimodal Large Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xu and Li, Danyang and Dong, Xiaohang and Wu, Tianhao and Yu, Hualong and Wang, Jianye and Li, Qicheng and Li, Xiang}, title = {UniChange: Unifying Change Detection with Multimodal Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42169-42179} }
Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Qian, Gordon Guocheng and Wang, Kuan-Chieh Jackson and Nemchinov, Egor and Haji-Ali, Moayed and Guler, Riza Alp and Menapace, Willi and Skorokhodov, Ivan and Kag, Anil and Zhu, Jun-Yan and Tulyakov, Sergey}, title = {Omni-Attribute: Open-vocabulary Attribute Encoder for Visual Concept Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8194-8204} }
LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yichao and Miao, Qiaowei and Quan, Jinsheng and Yang, Wei and Li, Zhihui and Luo, Yawei}, title = {LangField4D: Learning Identity-Adaptive and Spatio-Temporal Continuous 4D Language Fields for Dynamic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9558-9569} }
Representing 3D Faces with Learnable B-Spline Volumes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chandran_2026_CVPR, author = {Chandran, Prashanth and Wang, Daoye and Bolkart, Timo}, title = {Representing 3D Faces with Learnable B-Spline Volumes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13824-13834} }
ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiaojun and Luo, Sixiao and Liu, Ziqi and Yang, Min and Zhang, Qin and Zhang, Liang-Jie}, title = {ChartR: Evaluating Reasoning Accuracy and Robustness in Chart Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41193-41202} }
Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xingyu and Yang, Mengying and Ma, Xinghua and Liang, Dong and Li, Fanding and Luo, Gongning and Wang, Wei and Wang, Kuanquan and Li, Shuo}, title = {Elucidating the Design Space of Arbitrary-Noise-Based Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30833-30842} }
VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cong_2026_CVPR, author = {Cong, Xiaoyan and Yang, Haotian and Wang, Angtian and Wang, Yizhi and Yang, Yiding and Zhang, Canyu and Ma, Chongyang}, title = {VIVA: VLM-Guided Instruction-Based Video Editing with Reward Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34364-34374} }
Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Shenghai and Yihan, Wei and Yee, Jason and Qiao, Zhuoran and lou, boyang and Hu, Enwen}, title = {Adaptive 3D Perception for Small Aerial Targets Under Sparse Sampling via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39064-39074} }
FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yiweng and He, Bo and Wang, Junke and Zheng, Xiangyu and Ye, Ziyi and Wu, Zuxuan}, title = {FluxMem: Adaptive Hierarchical Memory for Streaming Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {31272-31282} }
Adapting In-context Generation for Enhanced Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haiwen and Chen, Zining and Liu, Delong and Hou, Zhaohui and Zhao, Zhicheng and Su, Fei}, title = {Adapting In-context Generation for Enhanced Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29167-29177} }
Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting-
[pdf]
[supp]
[bibtex]@InProceedings{Bo_2026_CVPR, author = {Bo, Yuntian and Zhu, Yazhou and Koniusz, Piotr and Zhang, Haofeng}, title = {Focus on Background: Exploring SAM's Potential in Few-shot Medical Image Segmentation with Background-centric Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30032-30041} }
Convolutional Neural Networks Driven by Content Similarity-
[pdf]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Ligeng and Zhao, Guihu}, title = {Convolutional Neural Networks Driven by Content Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27449-27459} }
PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Gensheng and Jiang, Xiruo and Cai, Xinhao and Chen, Tao and Yao, Yazhou and Jeon, Byeungwoo}, title = {PEARL: Geometry Aligns Semantics for Training-Free Open-Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17927-17937} }
CoD: A Diffusion Foundation Model for Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Zhaoyang and Zheng, Zihan and Xue, Naifu and Li, Jiahao and Li, Bin and Guo, Zongyu and Zhang, Xiaoyi and Li, Houqiang and Lu, Yan}, title = {CoD: A Diffusion Foundation Model for Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38420-38429} }
ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yang and Zhaxizhuoma, Zhaxizhuoma and Jiang, Hongru and Xia, Junjie and Zhang, Hongquan and Du, Jinda and Zhou, Yunsong and Zeng, Jia and Hao, Ce and Ren, Jieji and Yu, Qiaojun and Lu, Cewu and Qiao, Yu and Pang, Jiangmiao}, title = {ForceVLA2: Unleashing Hybrid Force-Position Control with Force Awareness for Contact-Rich Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8911-8920} }
TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qian_2026_CVPR, author = {Qian, Rui and Cao, Haozhi and Deng, Tianchen and Hu, Tianxin and Guo, Weixiang and Yuan, Shenghai and Xie, Lihua}, title = {TGSFormer: Scalable Temporal Gaussian Splatting for Embodied Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11881-11890} }
CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xuecong and Ding, Mengzhu and Sun, Zixuan and Li, Zhang and Teng, Xichao}, title = {CRFT: Consistent-Recurrent Feature Flow Transformer for Cross-Modal Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34784-34794} }
VISTA: A Test-Time Self-Improving Video Generation Agent-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2026_CVPR, author = {Long, Do Xuan and Wan, Xingchen and Nakhost, Hootan and Lee, Chen-Yu and Pfister, Tomas and Arik, Sercan \"O.}, title = {VISTA: A Test-Time Self-Improving Video Generation Agent}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6021-6032} }
Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2026_CVPR, author = {Dai, Lingyun and Chen, Zehao and Liu, Yan and Gu, Shi and Lin, Peng and Ma, De and Tang, Huajin and Zheng, Qian and Pan, Gang}, title = {Dynamic-Static Decomposition for Novel View Synthesis of Dynamic Scenes with Spiking Neurons}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8342-8352} }
Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Huaqi and Liu, Bingxi and Chen, Guangcheng and Tang, Fulin and He, Li and Zhang, Hong}, title = {Hierarchical Visual Relocalization with Nearest View Synthesis from Feature Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40981-40991} }
Unleashing Vision-Language Semantics for Deepfake Video Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jiawen and Miao, Yunqi and Zhang, Xueyi and Deng, Jiankang and Pang, Guansong}, title = {Unleashing Vision-Language Semantics for Deepfake Video Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42953-42963} }
Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2026_CVPR, author = {Qi, Hantao and Yan, Yan and Gao, Junlong and Wang, Hanzi}, title = {Protect to Adapt: Orthogonal Subspace Control with Ranked Negative-Prompt Curriculum for Few-Shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20160-20169} }
MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Guohui and Yu, Hu and Ma, Xiaoxiao and Pan, Yaning and Xu, Hang and Huang, Jie and Zhao, Feng}, title = {MaskFocus: Focusing Policy Optimization on Critical Steps for Masked Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5956-5966} }
Differentially Private 2D Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sivangi_2026_CVPR, author = {Sivangi, Kaushik Bhargav and Henderson, Paul and Deligianni, Fani}, title = {Differentially Private 2D Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21143-21153} }
MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Chentao and Zhang, He and Yuan, Haolei and Lin, Haozhe and Tao, Jianhua and Zhang, Hongwen and Yu, Tao}, title = {MetricHMSR: Metric Human Mesh and Scene Recovery from Monocular Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21132-21142} }
Uni-Hema: Unified Model for Digital Hematopathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rehman_2026_CVPR, author = {Rehman, Abdul and Rasool, Iqra and Imran, Ayisha and Ali, Mohsen and Sultani, Waqas}, title = {Uni-Hema: Unified Model for Digital Hematopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37578-37589} }
ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kondic_2026_CVPR, author = {Kondic, Jovana and Li, Pengyuan and Joshi, Dhiraj and Sanchez, Isaac and Wiesel, Ben and Abedin, Shafiq and Alfassy, Amit and Schwartz, Eli and Caraballo, Daniel and Cinar, Yagmur Gizem and Scheidegger, Florian and Ross, Steven I. and Weidele, Daniel Karl I. and Hua, Hang and Arutyunova, Ekaterina and Herzig, Roei and Wang, Zihan and Yu, Xinyue and Zhao, Yunfei and Jiang, Sicong and Liu, Minghao and Lin, Qunshu and Oliva, Aude and Feris, Rogerio}, title = {ChartNet: A Million-Scale, High-Quality Multimodal Dataset for Robust Chart Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15922-15932} }
MapRoute:Precise-Concept Erasing Mappers via Semantic Routing-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Sihao and Liang, Baixi and Xia, Shuohong and Yang, Yunyun}, title = {MapRoute:Precise-Concept Erasing Mappers via Semantic Routing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10187-10196} }
GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping-
[pdf]
[supp]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Beining and Chao, Yu-Wei and Coumans, Erwin and Eppner, Clemens and Deng, Jia and Birchfield, Stan and Murali, Adithyavairavan}, title = {GraspGen-X: Cross-Embodiment 6-DOF Diffusion-based Grasping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {20878-20889} }
Not All Birds Look The Same: Identity-Preserving Generation For Birds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Aaron and Saha, Oindrila and Maji, Subhransu}, title = {Not All Birds Look The Same: Identity-Preserving Generation For Birds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1983-1993} }
Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zihao and Wu, Aming and Li, Yang and Han, Yahong and Shen, Jialie}, title = {Geometric-Aware Hypergraph Reasoning for Novel Class Discovery in Point Cloud Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10006-10015} }
Discriminative Perception via Anchored Description for Reasoning Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Tao and Zhou, Qing and Li, Yanliang and Wang, Qi}, title = {Discriminative Perception via Anchored Description for Reasoning Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13189-13198} }
Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Narayanan_2026_CVPR, author = {Narayanan, Sriram and Ramanagopal, Mani and Narasimhan, Srinivasa}, title = {Dual Band Thermal Videography: Separating Time-Varying Reflection and Emission Near Ambient Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {199-208} }
Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Lyu_2026_CVPR, author = {Lyu, Tianle and Mo, Mengjingcheng and Wen, Ting and Song, Zhen and Xiong, Zinan and Zhu, Yanjie}, title = {Breaking the Continuum: Discrete Distribution Learning for Structural MRI Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37568-37577} }
SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Peizheng and Zhang, Zhenghao and Holtz, David and Yu, Hang and Yang, Yutong and Lai, Yuzhi and Song, Rui and Geiger, Andreas and Zell, Andreas}, title = {SpaceDrive: Infusing Spatial Awareness into VLM-based Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40096-40107} }
H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiaqi and Chen, Wenting and He, Xiangjian and Li, Yuanbai and Yang, Sen and Shen, Linlin and Xing, Xiaohan}, title = {H2-Surv: Hierarchical Hyperbolic Multimodal Representation Learning for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28318-28327} }
CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Jiahui and Li, Qing and Cai, Fengyu and Karray, Fakhri}, title = {CodeMMR: Bridging Natural Language, Code, and Image for Unified Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38742-38752} }
RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yanping and Liu, Zhening and Li, Zijian and Lin, Zehong and Zhang, Jun}, title = {RemedyGS: Defend 3D Gaussian Splatting Against Computation Cost Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33227-33236} }
Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Lan_2026_CVPR, author = {Lan, Kaiyang and Cui, Ying and Jing, Chenchen and Zheng, Jianwei and Guo, Dongyan}, title = {Beyond Explicit Language: Plug-and-Play Visual-to-Linguistic Modeling Toward General Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42604-42614} }
BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery-
[pdf]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Nan and Ma, Long and Ma, Tengyu and Liu, Zhu and Liu, Yingchi and Liu, Risheng}, title = {BiProLoRA: Bilevel Prompt LoRA for Real Scene Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15465-15475} }
Benchmarking Endoscopic Surgical Image Restoration and Beyond-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jialun and Guo, Diandian and Yang, Donghui and Li, Zhixi and Feng, Yuxin and Ma, Long and Du, Bo and Heng, Pheng-Ann}, title = {Benchmarking Endoscopic Surgical Image Restoration and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37413-37422} }
DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xinkui and Zhang, Yifan and Liu, Sai and Wang, Naibo and Cheng, Guanjie and Xu, Yueshen and Liu, Chang and Deng, Shuiguang and Yin, Jianwei}, title = {DRAMA: Next-Gen Dynamic Orchestration for Resilient Multi-Agent Ecosystems in Flux}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1020-1030} }
EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Jiaxu and Wang, Xu and Xie, Mengwei and Zhang, Hang and Xu, Mu and Han, Yang and Yuan, Ding and Zhang, Hong and Yang, Yifan}, title = {EagleVision: A Dual-Stage Framework with BEV-grounding-based Chain-of-Thought for Spatial Intelligence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38637-38646} }
OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Zhaochong and Jia, Menglin and Qiu, Haonan and Zhou, Zijian and Huang, Xiaoke and Liu, Zhiheng and Ren, Weiming and Kahatapitiya, Kumara and Liu, Ding and He, Sen and Zhang, Chenyang and Xiang, Tao and Yang, Fanny and Belongie, Serge and Xie, Tian}, title = {OneStory: Coherent Multi-Shot Video Generation with Adaptive Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16173-16184} }
Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Hailin and Yang, Yifan and Shu, Jiazhi and Huang, Zixiong and Chen, Qi and Du, Qing and Tan, Mingkui}, title = {Tavatar: Topology-Aware Gaussian Attribute Derivation for Animatable Human Avatars}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4087-4096} }
From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Gentile_2026_CVPR, author = {Gentile, Francesco and Dall'Asen, Nicola and Tonini, Francesco and Mancini, Massimiliano and Vaquero, Lorenzo and Ricci, Elisa}, title = {From Weights to Concepts: Data-Free Interpretability of CLIP via Singular Vector Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2895-2906} }
OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Dingchuan and Li, Jiatong and Zhou, Jingwen and Zhuge, Zhengyue and Chen, Yueting and Li, Qi}, title = {OMoBlur: An Object Motion Blur Dataset and Benchmark for Real-World Local Motion Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22626-22635} }
StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Anh-Quan and Lopes, Ivan and de Charette, Raoul}, title = {StableMTL: Repurposing Latent Diffusion Models for Multi-Task Learning from Partially Annotated Synthetic Datasets}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37788-37798} }
Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dang_2026_CVPR, author = {Dang, Tiantian and Bi, Chao and Shen, Shufan and Liu, Jinzhe and Huang, Qingming and Wang, Shuhui}, title = {Locate-then-Sparsify: Attribution Guided Sparse Strategy for Visual Hallucination Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18251-18260} }
Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Gui_2026_CVPR, author = {Gui, Yuan and Luo, Hongchen and Wang, Jiao and Qu, Liqi}, title = {Beyond Rule-Based Agents: Active Markov Games for Realistic Multi-Agent Interaction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10689-10698} }
Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature-
[pdf]
[supp]
[bibtex]@InProceedings{Esfeh_2026_CVPR, author = {Esfeh, Mohammad Mahdi Kazemi and Yan, Qi and Zhang, Yongxing and Gholami, Zahra and Liao, Renjie and Abolmaesumi, Purang}, title = {Spectral Conformal Risk Control: Distribution-Free Tail Guarantees via Bayesian Quadrature}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12977-12986} }
VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Boyu and Wang, Zikang and Yue, Zhengrong and Yan, Kainan and Yu, Chenyun and Huang, Yi and Liu, Zijun and Wen, Yafei and Chen, Xiaoxin and Liu, Yang and Li, Peng and Wang, Yali}, title = {VideoChat-M1: Collaborative Policy Planning for Video Understanding via Multi-Agent Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33772-33783} }
Cycle-Consistent Tuning for Layered Image Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zheng and Lu, Min and Sun, Zhida and Lischinski, Dani and Cohen-Or, Daniel and Huang, Hui}, title = {Cycle-Consistent Tuning for Layered Image Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22478-22487} }
SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Muye and Zhang, Lingling and Li, Yifei and Wu, Yaqiang and Liu, Jun}, title = {SketchVL: Policy Optimization via Fine-Grained Credit Assignment for Chart Understanding and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4738-4748} }
ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Hanyu and Cai, Ruojin and Marschner, Steve and Snavely, Noah}, title = {ArchSym: Detecting 3D-Grounded Architectural Symmetries in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36561-36570} }
Structural Graph Probing of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Haoyu and Zhuo, Yue and Zheng, Yu and Wang, Qi R.}, title = {Structural Graph Probing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24084-24094} }
P-Flow: Prompting Visual Effects Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Rui and Shou, Mike Zheng}, title = {P-Flow: Prompting Visual Effects Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9149-9160} }
Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Ehm_2026_CVPR, author = {Ehm, Viktoria and Cao, Dongliang and Marin, Riccardo and Scholz, Daniel and Wang, Weikang and Bernard, Florian and Cremers, Daniel}, title = {Teaching DINOv3 About Partial 3D Geometry: A Self-Supervised Geometry-Aware Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42071-42081} }
One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fa_2026_CVPR, author = {Fa, Zhan and Duan, Yue and Zhang, Jian and Qi, Lei and Shi, Yinghuan}, title = {One Token, Two Fates: A Unified Framework via Vision Token Manipulation Against MLLMs Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11106-11115} }
SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zheng and He, Zijian and He, Huiguo and Zhong, Weizhi and Tang, Yejun and Yang, Huan and Gai, Kun and Li, Guanbin}, title = {SpatialDiff: 3D-Aware Object Movement via Implicit Spatial Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18396-18406} }
Voxify3D: Pixel Art Meets Volumetric Rendering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yi-Chuan and Chan, Jiewen and Chien, Hao-Jen and Liu, Yu-Lun}, title = {Voxify3D: Pixel Art Meets Volumetric Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15398-15410} }
Multi-modal Frequency Decomposition Network for Semantic Scene Completion-
[pdf]
[supp]
[bibtex]@InProceedings{Zuo_2026_CVPR, author = {Zuo, Die and Wang, Lubo and Liu, Ruonan and Guo, Qing and Wang, Chong and Wu, Dongdong and Feng, Wei and Yang, Kairui and Lin, Di}, title = {Multi-modal Frequency Decomposition Network for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41531-41540} }
Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Xihang and Fang, Yuhao and Zhou, Qing and Zhai, Bin and Hong, Jialong and Zhang, Wanpeng and Lu, Yao and Zhang, Ye and Li, Chun}, title = {Beyond Missing Modalities: Hypergraph Conditioned Diffusion for Uncertainty-Aware Multimodal Emotion Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22953-22963} }
GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jing and Liang, Jiajun and Liu, Jie and Liu, Henglin and Liu, Gongye and Zheng, Jun and Pang, Wanyuan and Ma, Ao and Xie, Zhenyu and Wang, Xintao and Wang, Meng and Wan, Pengfei and Liang, Xiaodan}, title = {GRPO-Guard: Mitigating Implicit Over-Optimization in Flow Matching via Regulated Clipping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5988-5998} }
Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Shule and Zhang, Zetian and Ma, Xiao and Ji, Zexuan}, title = {Target-Aware Invertible Encoder with Reconstruction Guidance for Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32714-32723} }
Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yadong and Li, Qiaoqi and Wang, Yueying and Fei, Lunke and Wen, Jie}, title = {Cross-View Distillation and Adaptive Masking for Incomplete Multi-View Multi-Label Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23051-23060} }
Efficient Weighted Sampling via Score-based Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Heasung and Lee, Taekyun and Kim, Hyeji and De Veciana, Gustavo}, title = {Efficient Weighted Sampling via Score-based Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1155-1166} }
SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Meihua and Zhang, Yang and He, Weizhao and Qu, Hu and Li, Yisong}, title = {SD-FSMIS: Adapting Stable Diffusion for Few-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29979-29989} }
FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Shitong and Gu, Yufei and Xie, Zeke}, title = {FastLightGen: Fast and Light Video Generation with Fewer Steps and Parameters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2104-2114} }
The Invisible Gorilla Effect in Out-of-distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Anthony_2026_CVPR, author = {Anthony, Harry and Liang, Ziyun and Warr, Hermione and Kamnitsas, Konstantinos}, title = {The Invisible Gorilla Effect in Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39314-39325} }
Learning by Analogy: A Causal Framework for Compositional Generalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Lingjing and Xie, Shaoan and Jiao, Yang and Chen, Yetian and Guo, Yanhui and Shao, Simone and Gao, Yan and Chen, Guangyi and Zhang, Kun}, title = {Learning by Analogy: A Causal Framework for Compositional Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36615-36626} }
Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chengsheng and Sun, Chenghao and Jiang, Xinyan and Li, Wei and Tian, Xinmei}, title = {Prefill-Time Intervention for Mitigating Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25293-25303} }
UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Xufan and Wu, Yushuang and Guo, Xiaoyang and Ye, Chongjie and Zhou, Jiaqing and Hu, Tianlei and Han, Xiaoguang and Du, Dong}, title = {UniPart: Part-Level 3D Generation with Unified 3D Geom-Seg Latents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {34227-34236} }
Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jialei and Liu, Li and Zhang, Jiehua and Xie, Yuhang and Liu, Yongxiang and Chen, Jiangming and Cheng, Ming-Ming}, title = {Rotation Invariant and Symmetry Aware Pixel Difference Network for Remote Sensing Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13264-13274} }
Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Rouyi and Wu, Yangzhi and Wen, Jiajun and Gao, Can and Liu, Feng and Lai, Zhihui and Shen, Linlin}, title = {Gamba: Mamba-based graph convolutional network with dynamic graph topology learning for action recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {6065-6074} }
Language-Guided One-Step Diffusion Model for Nighttime Flare Removal-
[pdf]
[supp]
[bibtex]@InProceedings{Ning_2026_CVPR, author = {Ning, Aoxiang and Yu, Kailong and Xue, Minglong and Pan, Liyuan and He, Jinhong and Yan, Wenchao and Zhou, Mingliang and Wu, Yirui}, title = {Language-Guided One-Step Diffusion Model for Nighttime Flare Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38442-38452} }
Instance-level Visual Active Tracking with Occlusion-Aware Planning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Haowei and Zhou, Kai and Gao, Hao and Zhang, Shiteng and Hu, Jinwu and Wen, Xutao and Ye, Qixiang and Tan, Mingkui}, title = {Instance-level Visual Active Tracking with Occlusion-Aware Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42494-42504} }
HandX: Scaling Bimanual Motion and Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zimu and Zhang, Yucheng and Xu, Xiyan and Wang, Ziyin and Xu, Sirui and Zhou, Kai and Zhou, Bing and Guo, Chuan and Wang, Jian and Wang, Yu-Xiong and Gui, Liang-Yan}, title = {HandX: Scaling Bimanual Motion and Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2274-2284} }
GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Chao and Zhao, Xiaochen and Deng, Xiang and Sun, Jingxiang and Di, Donglin and Su, Zhuo and Liu, Yebin}, title = {GeoDiff4D: Geometry-Aware Diffusion for 4D Head Avatar Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32485-32495} }
LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Guolei and Peng, Qinzhi and Xu, Gan and Huang, Yao and Lu, Yuxuan and Shen, Yongjun}, title = {LLaVAShield: Safeguarding Multimodal Multi-Turn Dialogues in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30130-30140} }
R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zewei and Zou, Jiajun and Zhang, Jiajia and Yang, Ao and He, Ruichao and Zhou, Haozheng and Liu, Ao and Liu, Jiawei and Jin, Leilei and Shen, Shan and Sun, Daying}, title = {R2G: A Multi-View Circuit Graph Benchmark Suite from RTL to GDSII}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18827-18836} }
Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Zhijing and Xu, Senyan and Jiang, Ruixuan and Liu, Kean and Tian, Runze and Fu, Xueyang and Zha, Zheng-Jun}, title = {Time-Specialized Event-Image Alignment for Blur-to-Video Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8045-8055} }
JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Kuang_2026_CVPR, author = {Kuang, Haohong and Xiao, Yang and Jiang, Changlong and Zheng, Jinghong and Xu, Hang and Wang, Ran and Cao, Zhiguo and Zhou, Joey Tianyi}, title = {JUMP-Hand: Learning Joint-wise Uncertainty to Gate Mixture of View Experts for Multi-View 3D Hand Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28348-28357} }
From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Chenyang and Liu, Jiaming and Chen, Hao and Huang, Runzhong and Wuwu, Qingpo and Li, Xiaoqi and Liu, Zhuoyang and Li, Ying and Zhang, Renrui and Jia, Peng and Heng, Pheng-Ann and Zhang, Shanghang}, title = {From Manuals to Actions: A Unified VLA Model for Chain-of-Thought Manual Generation and Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13540-13552} }
Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction-
[pdf]
[bibtex]@InProceedings{Cui_2026_CVPR, author = {Cui, Qiongjie and Zhou, Pan and Chen, Jingjing and Zhao, Na}, title = {Anatomical Domain Shifts: Test-time Heterogeneous Adaptation for 3D Human Pose Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28369-28378} }
Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Tsai_2026_CVPR, author = {Tsai, Shiang-Feng and Liao, Yuan-Hong and Jhang, Jin-Cheng and Qiao, Nan and Sun, Min}, title = {Pointing at Parts: Training-Free Few-Shot Grounding in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33922-33932} }
SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Lendering_2026_CVPR, author = {Lendering, Camile and Akdag, Erkut and Bondarau, Egor}, title = {SubspaceAD: Training-Free Few-Shot Anomaly Detection via Subspace Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28557-28566} }
Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ce and He, Jinxi and He, Junyi and Sycara, Katia and Xie, Yaqi}, title = {Evolving Contextual Safety in Multi-Modal Large Language Models via Inference-Time Self-Reflective Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41182-41192} }
FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Tingrui and Zhang, Yiheng and Tang, Chen and Ping, Chuan and Zhao, Zixing and Wan, Le and Wang, Yuwang and Wang, Ronggang and He, Shengfeng}, title = {FlashMesh: Faster and Better Autoregressive Mesh Synthesis via Structured Speculation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27052-27061} }
SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agrawal_2026_CVPR, author = {Agrawal, Vaibhav and Parihar, Rishubh and Bhat, Pradhaan S and Sarvadevabhatla, Ravi Kiran and Radhakrishnan, Venkatesh Babu}, title = {SeeThrough3D: Occlusion Aware 3D Control in Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25403-25414} }
Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xin and Bai, Liang and Wang, Guanchao and Yang, Xian}, title = {Exemplar-Free Class Incremental Learning via Preserving Class-Discriminative Structure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17979-17988} }
RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Hyeonjeong and Xiong, Peixi and Ruan, Xiaoqian and Jia, Dian and Yu, Pei and Tang, Wei}, title = {RARE: Learn to RAnk and REtrieve for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11556-11566} }
Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Yura and Miles, Roy and Potamias, Rolandos Alexandros and Elezi, Ismail and Deng, Jiankang and Zafeiriou, Stefanos}, title = {Do You See What I Am Pointing At? Gesture-Based Egocentric Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18534-18544} }
3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Noda_2026_CVPR, author = {Noda, Takeshi and Liu, Yu-Shen and Han, Zhizhong}, title = {3D Gaussian Splatting with Self-Constrained Priors for High Fidelity Surface Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26041-26051} }
VideoSSR: Video Self-Supervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Zefeng and Qu, Xiaoye and Li, Yafu and Huang, Siyuan and Liu, Daizong and Cheng, Yu}, title = {VideoSSR: Video Self-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26530-26540} }
Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zirui and Wang, Xin and Zhang, Yipeng and Chen, Hong and Zheng, Kecheng and Zhu, Wenwu}, title = {Reasoning Diffusion for Unpaired Test Time Out-of-distribution Text-Image to Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {636-646} }
Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shuang and Deng, Chao and Chen, Hang and Liu, Liqun and Hu, Zhenyu and Cao, Te and Xue, Mengge and Chen, Yuan and Shu, Peng and Yu, Huan and Jiang, Jie}, title = {Disentangling to Re-couple: Resolving the Similarity-Controllability Paradox in Subject-Driven Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7741-7751} }
Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Changqing and Luo, Yueru and Chen, Changhao}, title = {Generalizing Visual Geometry Priors to Sparse Gaussian Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28578-28587} }
ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ming and Shan, Hui and Zheng, Kai and Shen, Chentao and Liu, Siyu and Fu, Yanwei and Chen, Zhen and Huang, Xiangru}, title = {ReWeaver: Towards Simulation-Ready and Topology-Accurate Garment Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4122-4131} }
Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chen and Wang, Ling and Zheng, Zhuoran and Cui, Yuning and Yang, Zhixiong and Chen, Xiangyu and Zhang, Yue and Jiang, Weidong and Xia, Jingyuan}, title = {Scan Clusters, Not Pixels: A Cluster-Centric Paradigm for Efficient Ultra-high-definition Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15528-15537} }
SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alansari_2026_CVPR, author = {Alansari, Mohamad and Suryanto, Naufal and Velayudhan, Divya and Javed, Sajid and Werghi, Naoufel and Naseer, Muzammal}, title = {SPARROW: Learning Spatial Precision and Temporal Referential Consistency in Pixel-Grounded Video MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17545-17556} }
PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maqbool_2026_CVPR, author = {Maqbool, Danyal and Lee, Changhee and Huemann, Zachary and Church, Samuel D. and Larson, Matthew E. and Perlman, Scott B. and Romero, Tomas A. and Warner, Joshua D. and Lubner, Meghan and Tie, Xin and Merkow, Jameson and Hu, Junjie and Cho, Steve Y. and Bradshaw, Tyler J.}, title = {PETAR: Localized Findings Generation with Mask-Aware Vision-Language Modeling for PET Automated Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42637-42648} }
PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mingzhe and Zhang, Renhao and Wen, Zhiyang and Pan, Siqi and da Silva, Bruno Castro and Zhai, Juan and Ma, Shiqing}, title = {PROMPTMINER: Black-Box Prompt Stealing against Text-to-Image Generative Models via Reinforcement Learning and VLM-Guided Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7795-7804} }
Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Weisheng and Wu, Qiwei and Zhang, Jiaxi and Tan, Jing and Li, Yangfan and Fang, Yuetong and Xiong, Jiaqi and Wu, Kai and Ou, Rong and Xu, Renjing}, title = {Iterative Closed-Loop Motion Synthesis for Scaling the Capabilities of Humanoid Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16398-16407} }
Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Haiwei and Li, Kemou and Li, Yuanman and Zhou, Jiantao}, title = {Editprint: General Digital Image Forensics via Editing Fingerprint with Self-Augmentation Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35483-35493} }
When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Mingrui and Wang, Fengzhi and Wei, Xin and Wang, Jun and Wang, Nannan and Gao, Xinbo}, title = {When Lines Meet Textures: Spatial-Frequency Aligned Diffusion Features for Cross-Sparsity Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37715-37724} }
Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Xiaoqian and Xiao, Dong and Li, Husen and Liu, Zheng and Chen, Renjie}, title = {Routing on Demand: DSNet for Efficient Progressive Point Cloud Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39111-39120} }
ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenxi and Huang, Jingchen and Lyu, Chenyang and Liu, Moran and Lin, Haozhe and Ding, Guiguang and Guo, Yuchen}, title = {ElasticFormer: Detecting Objects in HRW Shots via Elastic Computing Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32735-32744} }
S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chavan_2026_CVPR, author = {Chavan, Arnav and Lele, Nahush and Bamba, Udbhav and Dayal, Sankalp and Raghunathan, Aditi and Gupta, Deepak}, title = {S2D: Selective Spectral Decay for Quantization-Friendly Conditioning of Neural Activations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12074-12083} }
Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vyas_2026_CVPR, author = {Vyas, Apoorv and Chang, Heng-Jui and Yang, Cheng-Fu and Huang, Po-Yao and Gao, Luya and Richter, Julius and Chen, Sanyuan and Le, Matthew and Doll\'ar, Piotr and Feichtenhofer, Christoph and Lee, Ann and Hsu, Wei-Ning}, title = {Pushing the Frontier of Audiovisual Perception with Large-Scale Multimodal Correspondence Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30172-30182} }
PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Mijeong and Kim, Gunhee and Choi, Jungyoon and Roh, Wonjae and Han, Bohyung}, title = {PhysGaia: A Physics-aware Benchmark with Multi-Body Interactions for Dynamic Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22604-22614} }
NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Zijun and Peng, Yuxin}, title = {NS-Diff: Fluid Navier-Stokes Guided Video Diffusion via Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43218-43227} }
CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chenyu and Chen, Hongze and Bao, Jingzhi and Zhu, Lingting and Zhang, Runze and Chen, Weikai and Hu, Zeyu and Yin, Yingda and Luo, Keyang and Wang, Xin}, title = {CaliTex: Geometry-Calibrated Attention for View-Coherent 3D Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5923-5933} }
DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yaokun and Ding, Lihe and Chen, Xiao and Tan, Guang and Xue, Tianfan}, title = {DynamicTree: Interactive Real Tree Animation via Sparse Voxel Spectrum}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1241-1251} }
Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ikeda_2026_CVPR, author = {Ikeda, Kazuma and Hara, Ryosei and Nagata, Rokuto and Sako, Ozora and Ding, Zihao and Kado, Takahiro and Fujioka, Ibuki and Beppu, Taro and Isogawa, Mariko and Yoshioka, Kentaro}, title = {Ghost-FWL: A Large-Scale Full-Waveform LiDAR Dataset for Ghost Detection and Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {17164-17173} }
Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Keyang and Zhou, Sifan and Xu, Hongbin and Xu, Gang and Yang, Zhifei and Wang, Yikai and Xiao, Zhen and Long, Jieyi and Li, Ming}, title = {Yo'City: Personalized and Boundless 3D Realistic City Scene Generation via Self-Critic Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3219-3230} }
LVLM-Aided Alignment of Task-Specific Vision Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koebler_2026_CVPR, author = {Koebler, Alexander and Kuhn, Lukas and Thon, Ingo and Buettner, Florian}, title = {LVLM-Aided Alignment of Task-Specific Vision Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7837-7846} }
Detecting Unknown Objects via Energy-based Separation for Open World Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2026_CVPR, author = {Heo, Jun-Woo and Park, Keonhee and Park, Gyeong-Moon}, title = {Detecting Unknown Objects via Energy-based Separation for Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27558-27567} }
EpiAgent: An Agent-Centric System for Ancient Inscription Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Shipeng and Chen, Ang and Nie, Na and Fang, Pengfei and Zhang, Min-Ling and Xue, Hui}, title = {EpiAgent: An Agent-Centric System for Ancient Inscription Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {39304-39313} }
Unified Latent Space for Understanding and Generation via Semantic Auto-encoder-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiaojie and Zhao, Yang and Li, Ming and Zhang, Yancheng and Lyu, Zonglin and Chen, Yunpeng and Wang, Rui and Zhou, Daquan}, title = {Unified Latent Space for Understanding and Generation via Semantic Auto-encoder}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2115-2124} }
Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Jaekyun and Kim, Dongjin and Lee, Soomin and Wang, Guanghui and Kim, Tae Hyun}, title = {Diffusion-Based sRGB Real Noise Generation via Prompt-Driven Noise Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35956-35966} }
DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Yixin and He, Zhao and Hou, Yuxin and Zhou, Changhua and Liu, Zihao and Wang, Peng and Lu, Chenglong and Zhang, Xu and Wang, Wei}, title = {DiGraphHal-Bench: Evaluating Multimodal Large Language Models on Complex Directed Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30885-30894} }
Talking Together: Synthesizing Co-Located 3D Conversations from Audio-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Mengyi and Chang, Shouchieh and Bai, Ziqian and Liu, Shichen and Zhang, Yinda and Song, Luchuan and Pandey, Rohit and Fanello, Sean and Huang, Zeng}, title = {Talking Together: Synthesizing Co-Located 3D Conversations from Audio}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {3965-3977} }
MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Leo Kaixuan and Shaikh, Abdus and Liang, Ruofan and Wu, Zhijie and Guan, Yushi and Vijaykumar, Nandita}, title = {MERG3R: A Divide-and-Conquer Approach to Large-Scale Neural Visual Geometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28969-28978} }
Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yiyan and Zhang, Menghao and Sun, Haifeng and Ren, Pengfei and Chu, Xianao and Xu, Chenye and Tan, Hong and Wang, Jinghan and Qi, Qi and Wang, Jingyu}, title = {Alert-CLIP: Abnormality-aware Latent-Enhanced Representation Tuning of CLIP for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35545-35554} }
Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Shuangkang and Shen, I-Chao and Zhang, Xuanyang and Wang, Zesheng and Wang, Yufeng and Ding, Wenrui and YU, Gang and Igarashi, Takeo}, title = {Dropping Anchor and Spherical Harmonics for Sparse-view Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33312-33322} }
Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seyeon and Ye, Juncheol and Kim, Jaehong and Han, Dongsu}, title = {Neural-Centric Video Processing Pipeline for Unified Multi-Task Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18555-18564} }
MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Xinyu and Cen, Kangrui and Wei, Hongyang and Guo, Zhen and Li, Bairui and Wang, Zeqing and Zhang, Jinrui and Zhang, Lei}, title = {MICo-150K: A Comprehensive Dataset Advancing Multi-Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29695-29706} }
Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Miao and Hu, Xingchen and Liu, Jiyuan and Wang, Siwei and Wang, Min and Chen, Zijian}, title = {Scalable Multi-View Subspace Clustering with Tensorized Anchor Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {14367-14376} }
MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuting and Yuan, Kaishen and Lu, Hao and Yue, Yutao and Chen, Jintai and Wu, Kaishun}, title = {MedTVT-R1: A Multimodal LLM Empowering Medical Reasoning and Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35248-35259} }
Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Linkun and Zhang, Jiahao and Zhang, Juntao and Zhang, Lei and He, Fazhi and Han, Daojun}, title = {Good Can Sometimes be Bad: A Unified Attack against 3D Point Cloud Classifier by a Flexible Isotropic Resampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42246-42256} }
CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Jeffrey and Jeon, Minkyu and Ma, Ambri and Yeung-Levy, Serena and Zhong, Ellen D.}, title = {CryoHype: Reconstructing a thousand cryo-EM structures with transformer-based hypernetworks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35280-35290} }
Image-Guided Geometric Stylization of 3D Meshes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Changwoon and Lee, Hyunsoo and Jambon, Cl\'ement and Vinker, Yael and Kim, Young Min}, title = {Image-Guided Geometric Stylization of 3D Meshes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19972-19981} }
PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Ying and Xiong, Jerry and Yu, Tianjiao and Lourentzou, Ismini}, title = {PHANTOM: Physics-Infused Video Generation via Joint Modeling of Visual and Latent Physical Dynamics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11185-11194} }
Learning Multi-View Spatial Reasoning from Cross-View Relations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Suchae and Song, Jaehwi and Lee, Haeone and Kim, Hanna and Kim, Jian and Lee, Dongjun and Shin, Dong Kyu and Kim, Changyeon and Hahm, Dongyoon and Jin, Woogyeol and Choi, Juheon and Lee, Kimin}, title = {Learning Multi-View Spatial Reasoning from Cross-View Relations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2570-2581} }
UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Cao Thien and Trang, Phan Thi Thu and Duc, Do Nghiem and Anh, Ho Ngoc and Zhuang, Hanyang and Dung, Nguyen Duc}, title = {UCAN: Unified Convolutional Attention Network for Expansive Receptive Fields in Lightweight Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {23409-23418} }
Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Kiseok and Cho, Jaemin and Kim, Inchul and Kim, Min H.}, title = {Splat-Based Metal Artifact Reduction in Cone-Beam CT via Compact Attenuation Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26751-26760} }
Measuring the (Un)Faithfulness of Concept-Based Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Shubham and Ahuja, Narendra}, title = {Measuring the (Un)Faithfulness of Concept-Based Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38990-39000} }
Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Zekai and Du, Zongze and Zhu, Zhouhang and Zhong, Hao and Zhu, Muzhi and Wang, Wen and Xi, Yuling and Jing, Chenchen and Chen, Hao and Shen, Chunhua}, title = {Preserving Source Video Realism: High-Fidelity Face Swapping for Cinematic Quality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40653-40663} }
TRANSPORTER: Transferring Visual Semantics from VLM Manifolds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Stergiou_2026_CVPR, author = {Stergiou, Alexandros}, title = {TRANSPORTER: Transferring Visual Semantics from VLM Manifolds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24129-24140} }
BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Phuong-Anh and Pham, Tien Anh and Le, Duc-Trong and Nguyen, Cam-Van Thi}, title = {BALM: A Model-Agnostic Framework for Balanced Multimodal Learning under Imbalanced Missing Rates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {30246-30256} }
PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Jiadong and Xiong, Bojun and Tian, Jie and Li, Hua and Long, Xiao and Zheng, Yong and Fu, Huan}, title = {PerformRecast: Expression and Head Pose Disentanglement for Portrait Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25131-25141} }
4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Mazur_2026_CVPR, author = {Mazur, Kirill and Taher, Marwan and Davison, Andrew J.}, title = {4D Primitive-Mache: Glueing Primitives for Persistent 4D Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7372-7381} }
Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2026_CVPR, author = {Kwon, Joohyun and Sim, Geonhee and Moon, Gyeongsik}, title = {Zero-Shot Reconstruction of Animatable 3D Avatars with Cloth Dynamics from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18182-18192} }
HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosh_2026_CVPR, author = {Rosh, Green and Kukreja, Prateek and Vishakha, SR and H, Pawan Prasad B}, title = {HandDreamer: Zero-Shot Text to 3D Hand Model Generation using Corrective Hand Shape Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8846-8856} }
Learning Latent Proxies for Controllable Single-Image Relighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Haoze and Wang, Zihao and Wu, Xianfeng and Bai, Yajing and Liu, Yexin and Li, Yun and Xu, Xiaogang and Yang, Harry}, title = {Learning Latent Proxies for Controllable Single-Image Relighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27303-27312} }
Edit-aware RAW reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Punnappurath_2026_CVPR, author = {Punnappurath, Abhijith and Zhao, Luxi and Zhao, Ke and Nguyen, Hue and Grzeszczuk, Radek and Brown, Michael S.}, title = {Edit-aware RAW reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {8418-8427} }
View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Quan and Cai, Zeqiang and Zhao, Peiming and Wu, Jingze and Wu, Cailun and Chen, Hongbo and Lai, Jianhuang}, title = {View-Aware Semantic Alignment for Aerial-Ground Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {4383-4392} }
ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zhenghui and Wu, Chen and Cao, Xiangyong and Wang, Di and Chen, Hongruixuan and Tang, Datao and Zhang, Liangpei and Zheng, Zhuo}, title = {ChangeBridge: Spatiotemporal Image Generation with Multimodal Controls for Remote Senisng}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27761-27771} }
PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2026_CVPR, author = {Lv, Chunji and Chen, Zequn and Di, Donglin and Zhang, Weinan and Li, Hao and Wei, Chen and Lei, Yinjie and Li, Changsheng}, title = {PhysGM: Large Physical Gaussian Model for Feed-Forward 4D Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29855-29865} }
EmoStyle: Emotion-Driven Image Stylization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jingyuan and Bai, Zihuan and Huang, Hui}, title = {EmoStyle: Emotion-Driven Image Stylization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {594-603} }
AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Hyeongyu and Han, Geonhui and Hwang, Dosik}, title = {AcTTA: Rethinking Test-Time Adaptation via Dynamic Activation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22217-22226} }
Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Lixuan and Liu, Zhongnan and Hamilton, Jesse and Balter, James M. and Park, Jeong Joon and Shen, Liyue}, title = {Prospective Dynamic 3D MRI Reconstruction via Latent-Space Motion Tracking from Single Measurement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5627-5636} }
STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Jiatao and Shen, Ying and Chen, Tianrong and Dinh, Laurent and Wang, Yuyang and Bautista, Miguel Angel and Berthelot, David and Susskind, Josh and Zhai, Shuangfei}, title = {STARFlow-V: End-to-End Video Generative Modeling with Autoregressive Normalizing Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9084-9094} }
High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jia and Dai, Yijing and Cao, Tingfeng and Wu, Meiling and Luo, Tao and Zhang, Jian Dong and Lu, Guangming and Zeng, Xiaoyi}, title = {High-Fidelity Virtual Try-On beyond Paired Data Scarcity via Diffusion-based Cycle-Consistent Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35821-35830} }
Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jingjing and Zhang, Lei and Fu, Zheren and Hu, Bo and Mao, Zhendong}, title = {Self-guided Semantic Inspection for Zero-Shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33881-33890} }
Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Kunlun and Cheng, Haotong and Li, Jiangmeng and Zou, Xu and Zhou, Jiahuan}, title = {Vision-Language Attribute Disentanglement and Reinforcement for Lifelong Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40397-40406} }
SAGA: Source Attribution of Generative AI Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kundu_2026_CVPR, author = {Kundu, Rohit and Mohanty, Vishal and Xiong, Hao and Jia, Shan and Balachandran, Athula and Roy-Chowdhury, Amit K.}, title = {SAGA: Source Attribution of Generative AI Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {21273-21283} }
Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhilin and Wang, Yabin and Ma, Zhiheng and Song, Yaguang and Wang, Yaowei and Hong, Xiaopeng}, title = {Dance Across Shifts: Forward-Facilitation Continual Test-Time Adaptation through Dynamic Style Bridging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {32322-32333} }
Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Hyeonseo and Kwon, Hyuk and Lee, Kibok}, title = {Enhancing Continual Learning of Vision-Language Models via Dynamic Prefix Weighting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18042-18052} }
Temporal Inversion for Learning Interval Change in Chest X-Rays-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Hanbin and Jeon, Kyeongmin and Choi, Doowoong and Park, Chang Min}, title = {Temporal Inversion for Learning Interval Change in Chest X-Rays}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {28338-28347} }
PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiang and Yoo, Sohyun and Wu, Hongrui and Li, Chuan and Xie, Jianwen and Tu, Zhuowen}, title = {PixARMesh: Autoregressive Mesh-Native Single-View Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5881-5891} }
PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Aiqiu and Qiu, Zhaofan and Yao, Ting and Mei, Tao}, title = {PS-SR: Pseudo-Single-Step Video Super-Resolution via Speculative Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38218-38227} }
ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Jie Long and Lee, Gim Hee}, title = {ARES: Unifying Asymmetric RGB-Event Stereo for Probabilistic Scene Flow Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37022-37031} }
SelfHVD: Self-Supervised Handheld Video Deblurring-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Honglei and Zhang, Zhilu and Fan, Junjie and Wu, Xiaohe and Zuo, Wangmeng}, title = {SelfHVD: Self-Supervised Handheld Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37486-37495} }
RFDM: Residual Flow Diffusion Models for Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Salehi_2026_CVPR, author = {Salehi, Mohammadreza and Noroozi, Mehdi and Morreale, Luca and Chavhan, Ruchika and Chadwick, Malcolm and Gil Couto Pimentel Ramos, Alberto and Mehrotra, Abhinav}, title = {RFDM: Residual Flow Diffusion Models for Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43514-43524} }
The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Wei and Dai, Yang and Huang, Jincai and Tao, Qing}, title = {The Power of Decaying Steps: Enhancing Attack Stability and Transferability for Sign-based Optimizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42300-42309} }
Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Le and Gan, Hongping}, title = {Multi-Scale Gradient-Guided Unrolling Architecture with Adaptive Mamba for Compressive Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {41794-41803} }
OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Haosong and Li, Hao and Dai, Yalun and Lan, Yushi and Luo, Yihang and Qi, Tianyu and Zhang, Zhengshen and Zhan, Yufeng and Zhang, Junfei and Xu, Wenchao and Liu, Ziwei}, title = {OmniVGGT: Omni-Modality Driven Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36485-36497} }
Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Guangchen and Wu, Yirui and Zhu, Wei and Wang, Tao and Zhang, Hao and Li, Bo and Lu, Tong}, title = {Bayesian Decomposition and Semantic Completion for Few-shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12354-12363} }
Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Dingkun and Shen, Zehong and Xia, Yan and Pavlakos, Georgios and Shen, Yujun and Zhou, Xiaowei}, title = {Natural Human Motion Recovery by Aligning High-Order Temporal Dynamics from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7187-7196} }
Ego: Embedding-Guided Personalization of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seifi_2026_CVPR, author = {Seifi, Soroush and Gardier, Simon and Dorovatas, Vaggelis and Reino, Daniel Olmeda and Aljundi, Rahaf}, title = {Ego: Embedding-Guided Personalization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {11674-11683} }
Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zheng and Chen, Haoran and Qin, Haoxuan and Wei, Zhipeng and Qian, Tianwen and Bai, Cong}, title = {Think, Then Verify: A Hypothesis-Verification Multi-Agent Framework for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33784-33793} }
MoVie: Broaden Your Views with Human Motion for Action Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Di and Ali, Mahmoud and Yu, Xuanlong and Shen, Xi and Kong, Quan and Francesca, Gianpiero and Br\'emond, Fran\c{c}ois}, title = {MoVie: Broaden Your Views with Human Motion for Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27313-27323} }
CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Qi and Li, Honglin and Yu, Yingchen and Zhou, Haoyi and Yang, Lin and Bai, Song and She, Qi and Huang, Zilong and Zhao, Yunqing}, title = {CodeDance: A Dynamic Tool-integrated MLLM for Executable Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {19186-19195} }
SpatialTree: How Spatial Intelligence Branches Out in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Yuxi and Li, Longfei and Yan, Shen and Liu, Xinhang and Peng, Sida and Wei, Yunchao and Zhou, Xiaowei and Kang, Bingyi}, title = {SpatialTree: How Spatial Intelligence Branches Out in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {16701-16711} }
HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chen and An, Yilu and Chen, Ying and Li, Hao and Ling, Xitong and Liu, Lihao and He, Junjun and Lin, Yuxiang and Wang, Zihui and Yu, Rongshan}, title = {HyperST: Hierarchical Hyperbolic Learning for Spatial Transcriptomics Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5730-5739} }
Towards Robust Sequential Decomposition for Complex Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Zilai and Cao, Mingdeng and Li, Zijie and Lian, Xiaochen and Shi, Yichun and Zhu, Peihao and Sun, Chen and Wang, Peng}, title = {Towards Robust Sequential Decomposition for Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38101-38110} }
Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation-
[pdf]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Jiang and Zhao, Xinbo and Tuo, Wenyin and Zou, Xiaochun}, title = {Prototypical Action Reasoning Facilitated by Vision-Language Alignment for Egocentric Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24000-24009} }
PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks-
[pdf]
[supp]
[bibtex]@InProceedings{Dixit_2026_CVPR, author = {Dixit, Shreyas and Aziz, Ashhar and Bajpai, Shashwat and Sharma, Vasu and Chadha, Aman and Jain, Vinija and Das, Amitava}, title = {PECCVAI: Overcoming the Brittleness of AI Image Watermarking Under Visual Paraphrasing Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {24471-24480} }
STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Runze and Song, Yuxuan and Cai, Youcheng and Liu, Ligang}, title = {STAC: Plug-and-Play Spatio-Temporal Aware Cache Compression for Streaming 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7567-7576} }
D^3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Hui and He, Yifan and Jin, Zhong}, title = {D{\textasciicircum}3FER: Dual Channel and Dual Branch Network for Robust Facial Expression Recognition under Dual Challenges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18085-18095} }
GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jiahao and Wang, Zihan and Li, Xiangyang and Zhu, Xing and Shen, Yujun and Xu, Yinghao and Jiang, Shuqiang}, title = {GA-VLN: Geometry-Aware BEV Representation for Efficient Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {40053-40063} }
SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data-
[pdf]
[supp]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Zixuan and Zhang, Zeyu and Lu, Fengyuan and Zhang, Shaofeng and Li, Wenbin and Fan, Qi and Gao, Yang}, title = {SAME: Sparse and Anchored Model Editing for Heterogeneous Incremental Learning under Limited Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25055-25065} }
Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2026_CVPR, author = {Jang, Won Shik and Kim, Ue-Hwan}, title = {Context-Nav: Context-Driven Exploration and Viewpoint-Aware 3D Spatial Reasoning for Instance Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {9626-9636} }
QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zhizhen and Wang, Hesong and Wang, Huan}, title = {QVGGT: Post-Training Quantized Visual Geometry Grounded Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7536-7545} }
OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dan_2026_CVPR, author = {Dan, Zedong and Wang, Zijie and Zhang, Wei and Lin, Xiangru and Zhang, Weiming and Tan, Xiao and Wang, Jingdong and Lin, Liang and Li, Guanbin}, title = {OptiMVMap: Offline Vectorized Map Construction via Optimal Multi-vehicle Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18140-18149} }
Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuankai and Xiao, Junjin and Huang, Shangwei and Zheng, Wei-shi and Zhang, Qing}, title = {Learning Explicit Continuous Motion Representation for Dynamic Gaussian Splatting from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {33291-33300} }
Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Hao and Wu, Tiru and Jiang, Yan and Zhou, Wanqi and Hu, Junxing and Han, Ai}, title = {Hierarchical Attacks for Multi-Modal Multi-Agent Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {42331-42340} }
MotionV2V: Editing Motion in a Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Burgert_2026_CVPR, author = {Burgert, Ryan and Herrmann, Charles and Cole, Forrester and Ryoo, Michael S and Wadhwa, Neal and Voynov, Andrey and Ruiz, Nataniel}, title = {MotionV2V: Editing Motion in a Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35988-35997} }
Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Shang_2026_CVPR, author = {Shang, Jingjie and Ma, Tengyu and Zhang, Heng and Liu, Jinyuan and Liu, Risheng and Wang, Yuan and Bo, Xiaochen}, title = {Human-Centric Multi-Exposure Fusion: Benchmark and Bi-level Cognition Distillation Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {26572-26581} }
DC-Merge: Improving Model Merging with Directional Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Han-Chen and Zhou, Zi-Hao and Luo, Mao-Lin and Di, Shimin and Zhang, Min-Ling and Wei, Tong}, title = {DC-Merge: Improving Model Merging with Directional Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22248-22258} }
Random Wins All: Rethinking Grouping Strategies for Vision Tokens-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2026_CVPR, author = {Fan, Qihang and Ai, Yuang and Huang, Huaibo and He, Ran}, title = {Random Wins All: Rethinking Grouping Strategies for Vision Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27357-27366} }
R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qi and Ni, Bolin and Xiang, Shiming and Peng, Houwen}, title = {R-4B: Incentivizing General-Purpose Auto-Thinking in MLLMs via Bi-Mode Annealing and Reinforce Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7891-7900} }
When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Hui and Yu, Yi and Yang, Yiming and Yi, Chenyu and Zhang, Qixin and Shen, Bingquan and Kot, Alex C. and Jiang, Xudong}, title = {When Robots Obey the Patch: Universal Transferable Patch Attacks on Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22867-22878} }
Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Long and Duan, Huiyu and Zheng, Guoquan and Zhang, Jianbo and Hao, Jie and Yuan, Liang}, title = {Life-IQA: Boosting Blind Image Quality Assessment through GCN-enhanced Layer Interaction and MoE-based Feature Decoupling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29866-29876} }
Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jiachen and Shanbhag, Hailan and Al Hassanieh, Haitham}, title = {Seeing through boxes: Non-Line-of-Sight 3D Reconstruction from Radar Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {1221-1230} }
Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Changlin and Zhang, Jiawei and Liu, Shuhao and Lin, Sihao and Shi, Zeyi and Li, Zhihui and Chang, Xiaojun}, title = {Efficient Training for Human Video Generation with Entropy-Guided Prioritized Progressive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {5967-5977} }
CoT-Edit: Let CoT Guide Instruction Video Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Sen and Guan, Fengbin and Zhang, Youliang and Li, Xin and Chen, Zhibo}, title = {CoT-Edit: Let CoT Guide Instruction Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {37960-37970} }
Sparse Spectral LoRA: Routed Experts for Medical VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Nejatimanzari_2026_CVPR, author = {Nejatimanzari, Omid and Asgariandehkordi, Hojat and Koleilat, Taha and Xiao, Yiming and Rivaz, Hassan}, title = {Sparse Spectral LoRA: Routed Experts for Medical VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35351-35362} }
MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xulin and Lu, Yan and Liu, Bin and Yang, Qinhong and Chu, Qi and Gong, Tao and Yu, Nenghai}, title = {MFEN: Multi-Frequency Expert Network for Visible-Infrared Person Re-ID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {18471-18480} }
UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Qianfeng and Guan, Qiyuan and Chen, Xiang and Jin, Jiyu and Jin, Guiyue and Dong, Jiangxin}, title = {UniRain: Unified Image Deraining with RAG-based Dataset Distillation and Multi-objective Reweighted Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {12428-12437} }
From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Mai_2026_CVPR, author = {Mai, Huayu and Sun, Rui and Chen, Yujia and Li, Wangkai and Wang, Bingzhou and Li, Aibing and He, Zhangyu and Wang, Yuan}, title = {From Softmax to Dirichlet: Evidential Learning for Semi-supervised Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {27697-27707} }
FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Guangzhao and Yang, Yanming and Song, Chenxi and Liu, Xiaohong and Zhang, Chi}, title = {FlowDirector: Training-Free Flow Steering for Precise Text-to-Video Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {7805-7815} }
ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Peijie and Ramshankar, Hariharan and Ramisa, Arnau and C, Amit Kumar K and Vidal, Ren\'e and Salaka, Vamsi and Bhagat, Rahul}, title = {ImageRAGTurbo: Towards One-step Text-to-Image Generation with Retrieval-Augmented Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {529-539} }
Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xingyu and Zhu, Beier and Wang, Shuo and Fang, Junfeng and Zhao, Kesen and Zhang, Hanwang and He, Xiangnan}, title = {Principled Steering via Null-space Projection for Jailbreak Defense in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {22889-22899} }
DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Lu, Ming and Li, Xingchen and Ke, Anle and Zhang, Ruiqi and Ma, Zhan}, title = {DiT-IC: Aligned Diffusion Transformer for Efficient Image Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25719-25729} }
PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Siyuan and Zheng, Chaoqun and Zhou, Xin and Feng, Tianrui and Liang, Dingkang and Bai, Xiang}, title = {PointTPA: Dynamic Network Parameter Adaptation for 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {36571-36581} }
Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Peter Yongho and Park, Juhyeon and Park, Jungwoo and Choi, Jubin and Seo, Jungwoo and Cha, Jiook and Moon, Taesup}, title = {Can Natural Image Autoencoders Compactly Tokenize fMRI Volumes for Long-Range Dynamics Modeling?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {35321-35330} }
Dexterous World Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Byungjun and Kim, Taeksoo and Lee, Junyoung and Joo, Hanbyul}, title = {Dexterous World Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {29663-29673} }
You Only Erase Once: Erasing Anything without Bringing Unexpected Content-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yixing and Zhang, Qing and Xu, Wenju and Zheng, Wei-Shi}, title = {You Only Erase Once: Erasing Anything without Bringing Unexpected Content}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {43197-43207} }
TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Jiawei and Tyszkiewicz, Michal Jan and Huang, Jiahui and Gojcic, Zan}, title = {TokenGS: Decoupling 3D Gaussian Prediction from Pixels with Learnable Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {15365-15375} }
EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chow_2026_CVPR, author = {Chow, Wei and Li, Linfeng and Kong, Lingdong and Li, Zefeng and Xu, Qi and Song, Hang and Ye, Tian and Wang, Xian and Bai, Jinbin and Xu, Shilin and Li, Xiangtai and Pan, Junting and Liu, Shaoteng and Zhou, Ran and Yang, Tianshu and Liu, Songhua}, title = {EditMGT: Unleashing Potentials of Masked Generative Transformers in Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {38038-38048} }
World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Eunsu and Park, Junyeong and An, Na Min and Kim, Junseong and Patel, Hitesh Laxmichand and Jin, Jiho and Kruk, Julia and Agarwal, Amit and Panda, Srikant and Ilasariya, Fenal Ashokbhai and Shim, Hyunjung and Oh, Alice}, title = {World in a Frame: Understanding Culture Mixing as a New Challenge for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {2477-2489} }
NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Ziteng and He, Qiang and Li, Bing and Chen, Feifei and Jin, Hai and Yang, Yun}, title = {NuWa: Deriving Lightweight Class-Specific Vision Transformers for Edge Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {156-166} }
SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Lee, Gim Hee}, title = {SCAPO: Self-Supervised Category-Level Articulated Pose Estimation from a Single 3D Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {13802-13811} }
CGHair: Compact Gaussian Hair Reconstruction with Card Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Haimin and Sarkar, Srinjay and Mosella-Montoro, Albert and Carrasco, Francisco Vicente and De la Torre, Fernando}, title = {CGHair: Compact Gaussian Hair Reconstruction with Card Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {25225-25235} }
Frequency-domain Manipulation for Face Obfuscation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Jintae and Ko, Keunsoo and Kim, Chang-Su}, title = {Frequency-domain Manipulation for Face Obfuscation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10241-10250} }
E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yihong and Liao, Haicheng and Nie, Tong and He, Junlin and Qu, Ao and Chen, Kehua and Ma, Wei and Li, Zhenning and Sun, Lijun and Xu, Chengzhong}, title = {E3AD: An Emotion-Aware Vision-Language-Action Model for Human-Centric End-to-End Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2026}, pages = {10610-10620} }
Back

