Findings
- Back
Revisiting Real-Time Detection Transformer with Efficient Encoder Design-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jiannan and Kane, Aditya and Zhou, Fengzhe and Wei, Yunchao and Shi, Humphrey}, title = {Revisiting Real-Time Detection Transformer with Efficient Encoder Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6859-6868} }
Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Hoin and Lu, Shenyu and Wang, De and Wang, Xiaoqian}, title = {Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7956-7967} }
Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Das_Biswas_2026_CVPR, author = {Das Biswas, Shristi and Roy, Arani and Roy, Kaushik}, title = {Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7903-7913} }
Towards Generalization of Scene Text Tampering Localization via Causal Invariance-
[pdf]
[supp]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Huiru and Dong, Bin and Huang, Kaizhu and Huang, Xiaowei and Wang, Qiufeng}, title = {Towards Generalization of Scene Text Tampering Localization via Causal Invariance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7262-7271} }
TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, K Naveen and Guizani, Mohsen}, title = {TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7945-7955} }
CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duan_2026_CVPR, author = {Duan, Chengqi and Sun, Kaiyue and Fang, Rongyao and Zhang, Manyuan and Feng, Yan and Luo, Ying and Liu, Yufang and Wang, Ke and Pei, Peng and Cai, Xunliang and Li, Hongsheng and Ma, Yi and Liu, Xihui}, title = {CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9586-9596} }
Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[bibtex]@InProceedings{Jangamreddy_2026_CVPR, author = {Jangamreddy, Nikhil Kumar and Baktashmotlagh, Mahsa and Arora, Chetan}, title = {Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7686-7696} }
Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tong_2026_CVPR, author = {Tong, Yujun and Chang, Dongliang and Yin, Zijin and Liu, Xintong and Fang, Yuanchen and Ma, Zhanyu}, title = {Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6976-6986} }
VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chupeng and Rao, Jiyong and Sun, Shangquan and Zhao, Runkai and Cai, Weidong}, title = {VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7530-7540} }
Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Siqi and Gao, Zilve and Qiu, Haibo and Liu, Fanfan and Shi, Peng and Zeng, Zhixiong and Liao, Qingmin and Ma, Lin}, title = {Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9738-9748} }
QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Xuan Bac and Nguyen, Hoang-Quan and Pandey, Sankalp and Faltermeier, Tim and Borys, Nicholas and Churchill, Hugh and Luu, Khoa}, title = {QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8684-8694} }
DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hannan_2026_CVPR, author = {Hannan, Tanveer and Mallios, Dimitrios and Pathak, Parth and Sardari, Faegheh and Seidl, Thomas and Bertasius, Gedas and Fayyaz, Mohsen and Sengupta, Sunando}, title = {DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9337-9346} }
Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Shizhao and Li, Jun and Li, Qiming}, title = {Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8143-8152} }
Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yizhou and Yang, Dingkang and Chen, Zizhi and Han, Minghao and Zhang, Xukun and Liu, Keliang and Wei, Jingwei and Zhang, Lihua}, title = {Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8651-8660} }
Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Youngmin and Oh, Changjae and Ham, Bumsub}, title = {Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7493-7502} }
Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jo_2026_CVPR, author = {Jo, Yujin and Bae, Sangyoon and Kim, Taesup}, title = {Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9706-9715} }
It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeha and Lee, Jin Won and You, Siwoo and Lee, Jangho}, title = {It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9500-9509} }
STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Jiang, Xueying and Zhang, Gongjie and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8185-8194} }
VACoT: Rethinking Visual Data Augmentation with VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengzhuo and Sun, Chong and Du, SiNan and Li, Chen and Lyu, Jing and Yuan, Chun}, title = {VACoT: Rethinking Visual Data Augmentation with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9780-9790} }
Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhou, Honglu and Wang, Shijie and Li, Junnan and Xiong, Caiming and Savarese, Silvio and Bansal, Mohit and Ryoo, Michael S. and Niebles, Juan Carlos}, title = {Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9088-9099} }
BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zihao and Wang, Ruotong and Lyu, Siwei and Zhang, Min and Wu, Baoyuan}, title = {BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8661-8671} }
Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bohan and Tang, Weidong and Chi, Zhixiang and Jin, Yi and Li, Zhenbo and Wang, Yang and Wu, Yanan}, title = {Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7830-7840} }
Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhou, Yan and Yang, Jufeng}, title = {Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8897-8906} }
Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Xiao, Bin}, title = {Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6748-6757} }
ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Zhou, Fangnan and Feng, Wei and Wan, Liang}, title = {ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6643-6654} }
NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Huanxin and Wu, Zhize and Jiang, Yue and Zhou, Jijian and Xu, Zhiwei and Li, Teng and Shu, Jianhua and Cheng, Fan}, title = {NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7562-7572} }
From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness-
[pdf]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Chenlin and Gong, Ao and Ling, Xingtao and Zhu, Yingying}, title = {From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7101-7110} }
ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Hosu and Kim, Junho and Kim, Hyunjun and Ro, Yong Man}, title = {ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8291-8302} }
VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2026_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Koc, Cem and Faghri, Fartash and Li, Chun-Liang and Feng, Bo and Lai, Zhengfeng and Cao, Meng and Tuzel, Oncel and Pouransari, Hadi}, title = {VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9654-9663} }
Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Haoyi and Wang, Xiaoxiao and Mao, Ning and Wang, Qian and Mu, Lifu and Zheng, Wen and Wei, Tao and Chen, Wei}, title = {Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9316-9326} }
DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Zeng, Zhixiong and Ma, Lin and Zhang, Jing}, title = {DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9389-9400} }
Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghoon and Lee, Geon and Park, Hyekang and Ham, Bumsub}, title = {Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6819-6828} }
Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xuekang and Zhou, Ji-Zhe and Feng, Kaiwen and Qu, Chenfan and Wang, Xiwen and Wang, Yunfei and Zhou, Liting and Liu, Jian}, title = {Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7198-7207} }
MIRA: Multimodal Iterative Reasoning Agent for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyun and Hua, Hang and Luo, Jiebo}, title = {MIRA: Multimodal Iterative Reasoning Agent for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9563-9573} }
SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thai_2026_CVPR, author = {Thai, Gia Huy and Vu, Hoang-Nguyen and Phan, Anh-Minh and Ly, Quang-Thinh and Nguyen, Thi-Ngoc-Truc and Ho, Nhat}, title = {SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7337-7346} }
HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tushar_2026_CVPR, author = {Tushar, Zahid Hassan and Purushotham, Sanjay}, title = {HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6955-6965} }
Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Han and Li, Qin and Wang, Peixin and Zhang, Min}, title = {Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8930-8940} }
Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters-
[pdf]
[supp]
[bibtex]@InProceedings{Vorster_2026_CVPR, author = {Vorster, Chris and Maniparambil, Mayug and O'Connor, Noel and Murphy, Noel and Molloy, Derek}, title = {Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7820-7829} }
Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Shijie and Wu, Changti and Yang, Laurence Tianruo and Yuan, Hang and Yu, Bin and Zhang, Lei and Chen, Kai}, title = {Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9824-9835} }
PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Jiaxin and Zhang, Guofeng and Ma, Wufei and Liang, Naifu and Kortylewski, Adam and Yuille, Alan}, title = {PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6869-6879} }
MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Tao_2026_CVPR, author = {Tao, Hanjun and Wang, Hua and Zhang, Fan}, title = {MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7388-7397} }
OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2026_CVPR, author = {Moon, Seungjae and Oh, Seunghyun and Ro, Youngmin}, title = {OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7357-7367} }
TP^2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ya-Yun and Tippayamontri, Kan and Yang, Chih-Yuan and Hsu, Jane Yung-jen}, title = {TP{\textasciicircum}2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8237-8246} }
ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ju_2026_CVPR, author = {Ju, Shaobo and Song, Baiyang and Chen, Tao and Zhang, Jiapeng and Wu, Qiong and Chang, Chao and Wang, Huaixi and Zhou, Yiyi and Ji, Rongrong}, title = {ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8326-8336} }
CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Akash and Ashraf, Tajamul and Singh, Rishu Kumar and Saeed, Numan and Saha, Sriparna and Chen, Xiuying and Khan, Salman}, title = {CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9695-9705} }
CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Jian_2026_CVPR, author = {Jian, Zhongquan and Chen, Yanhao and Hu, Bingbing and Lv, Wenhan and Wang, Shaopan and Wu, Jipeng and Yao, Junfeng and Lu, Yang and Wu, Qingqiang}, title = {CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7706-7716} }
Object-Centric Vision Token Pruning for Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Guangyuan and Zhao, Rongzhen and Deng, Jinhong and Wang, Yanbo and Pajarinen, Joni}, title = {Object-Centric Vision Token Pruning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7040-7049} }
SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Bahavan_2026_CVPR, author = {Bahavan, Thiru Thillai Nadarasar and Seneviratne, Sachith and Halgamuge, Saman}, title = {SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6901-6910} }
coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunhan and Wu, Qifeng and Pan, Jia-Hui and Hui, Ka-Hei and Hu, Jingyu and Jiang, Yuming and Sheng, Bin and Liu, Xihui and Gong, Wenjuan and Liu, Zhengzhe}, title = {coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9802-9812} }
Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Bumjun and No, Albert}, title = {Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7988-7997} }
Recursive Think-Answer Process for LLMs and VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Chee, Youngchae and Ro, Yong Man}, title = {Recursive Think-Answer Process for LLMs and VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9608-9621} }
FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Qinghui and Yang, Xue and Chen, Xunlei and Lai, Jinshan and Meng, Hua and Tang, Xiaohu}, title = {FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8009-8018} }
VoQA: Visual-only Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{An_2026_CVPR, author = {An, Jianing and Jiang, Luyang and Luo, Jie and Wu, Wenjun and Huang, Lei}, title = {VoQA: Visual-only Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9100-9109} }
IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zichen and Sun, Yuheng and Zhu, Mingxuan and Ma, Wenjie and Zhang, Situo and Wang, Zhexiang and Yang, Ziyue and Zhang, Danyang and Lan, Kunyao and Zhao, Zihan and Liu, Dingye and Xiang, Siqi and Chen, Lu and Yu, Kai}, title = {IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8672-8683} }
SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guimard_2026_CVPR, author = {Guimard, Quentin and Bartsch, Federico and Caldarella, Simone and Aljundi, Rahaf and Ricci, Elisa and Mancini, Massimiliano}, title = {SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8101-8110} }
Indexing Multimodal Language Models for Large-scale Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Tharwat_2026_CVPR, author = {Tharwat, Bahey and Kordopatis-Zilos, Giorgos and Suma, Pavel and Reid, Ian and Tolias, Giorgos}, title = {Indexing Multimodal Language Models for Large-scale Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6737-6747} }
MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xiyang and Li, Zongxia and Jin, Jihui and KV, Gouthaman and Raj, Vishnu and Sinha, Nilotpal and Chen, Jingxi and Du, Fan and Manocha, Dinesh}, title = {MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9433-9443} }
MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Zhipeng and Danier, Duolikun and Lenssen, Jan Eric and Bilen, Hakan}, title = {MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7304-7314} }
VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shiji and Xiong, Shukun and Huang, Yao and Yan, Jin and Wu, Zhenyu and Guan, Jiyang and Duan, Ranjie and Tao, Jialing and Xue, Hui and Wei, Xingxing}, title = {VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9412-9421} }
Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhe and Wen, Hao and Hao, Aiming and Song, Bingze and Wu, Meiqi and Wu, Jiahong and Chu, Xiangxiang and Lu, Sheng and Wang, Haoqian}, title = {Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8153-8163} }
Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration-
[pdf]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bowen and Wang, Tao and Zhang, Miao and Yu, Xin and Chen, Jinwei and Li, Bo and Zhang, Kaihao}, title = {Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8175-8184} }
FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Jintong and Zhao, He and Yang, Yibo and Guo, Dandan}, title = {FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7737-7746} }
PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosi_2026_CVPR, author = {Rosi, Gabriele and Cermelli, Fabio and Masone, Carlo and Caputo, Barbara}, title = {PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7326-7336} }
GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Cong and Deng, Xiangwen and Huang, Feice and Wu, ZhengXian and Jiang, Shen'ao and Jiao, Peng and Liu, Zhifang and Wang, Haoqian}, title = {GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9195-9204} }
Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Yeongsu and Choi, Seo-Yeon and Lee, Kyungsu}, title = {Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8857-8867} }
Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Takeuchi_2026_CVPR, author = {Takeuchi, Yuichiro and Imoto, Yusuke and Kato, Shunya}, title = {Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6880-6889} }
Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Su, Yiyang and Liu, Xiaoming}, title = {Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9401-9411} }
Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation-
[pdf]
[bibtex]@InProceedings{Bai_2026_CVPR, author = {Bai, Jiaqi and Guo, Hongcheng and Liu, Jiaheng and Zhou, Zhibo and Yang, Jian and Huang, Feiran}, title = {Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9867-9877} }
Trajectory-Diversity-Driven Robust Vision-and-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Wan, Cong and Dong, SongLin and Ding, Chenhao and Wang, Qiang and Ma, Zhiheng and Gong, Yihong}, title = {Trajectory-Diversity-Driven Robust Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9143-9154} }
Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yuhui and Yu, Siyue and Yang, Yuxing and Cheng, Guangliang and Xiao, Jimin}, title = {Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8941-8951} }
Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Wanqi and Guo, Jingcai and Cai, Yuxiang and Chen, Zhi}, title = {Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7747-7757} }
Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7717-7727} }
NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huynh_2026_CVPR, author = {Huynh, Quang Dang and Yin, Xuefei and Busch, Andrew and Espinosa, Hugo G. and Liew, Alan Wee-Chung and Worsey, Matthew T.O. and Zhu, Yanming}, title = {NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8368-8377} }
OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Junhui and Cai, Zhizhen and Wang, Puze and Ke, Guanzhou and Yang, Jianhua and Zhang, Man and Zhang, Qiang and He, Shengfeng}, title = {OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6717-6726} }
RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Suhang and Hu, Wei and Su, Yuhang and Zhang, Fan}, title = {RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9878-9887} }
Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Hongye and Krawczyk, Bartosz}, title = {Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7852-7861} }
DARTS: Distance-Aware Robust Training for Selective Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Sayyed_2026_CVPR, author = {Sayyed, A. Q. M. Sazzad and Bastian, Nathaniel D. and Restuccia, Francesco}, title = {DARTS: Distance-Aware Robust Training for Selective Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8806-8815} }
Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiming and Wei, Yujie and Feng, Lei and Su, Xiu and Xia, Xiaobo and Guan, Weili and Xie, Zeke and Yang, Shuo}, title = {Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9597-9607} }
D^2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoai and Wang, Hang and Liu, Yan and Hu, Huan and Yu, Bruce X.B.}, title = {D{\textasciicircum}2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8205-8214} }
Efficient Unlearning through Maximizing Relearning Convergence Delay-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Khoa and Woo, Simon S.}, title = {Efficient Unlearning through Maximizing Relearning Convergence Delay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7968-7977} }
Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation-
[pdf]
[bibtex]@InProceedings{Somers_2026_CVPR, author = {Somers, Jacey and Zale, Harrison and Mason, Janine and Walker, Tina and Quinn, Eddie and Lewis, Felix and Wright, Gavin and Young, Yvonne and Sullivan, Charles and Carter, Wayne and Foster, Julian}, title = {Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8534-8543} }
Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuefei and Liu, Jiang and Lin, Xiaodong and Tang, Ruixiang}, title = {Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9165-9174} }
Seeing Helps Reasoning in Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Yulu and Zhao, Kaiya Ivy and Poggio, Tomaso and Isola, Phillip}, title = {Seeing Helps Reasoning in Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7080-7090} }
TAPNext++: What's Next for Tracking Any Point (TAP)?-
[pdf]
[supp]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Sebastian and Zholus, Artem and Sundermeyer, Martin and Doersch, Carl and Goroshin, Ross and Tan, David Joseph and Chandar, Sarath and Triebel, Rudolph and Tombari, Federico}, title = {TAPNext++: What's Next for Tracking Any Point (TAP)?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8429-8438} }
From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Cihan and Zhou, Lebin and Zhao, Bingqing and Han, Rongduo and Yuan, Qiming and Zhu, Chenchen and Han, Linyi and Yang, Liang and Wang, Wei and Jiang, Wei and Ling, Nam}, title = {From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8544-8553} }
A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing-
[pdf]
[bibtex]@InProceedings{Ding_2026_CVPR, author = {Ding, Shiwei and Zhang, Lan and Wang, Zhenlin and Yuan, Xiaoyong}, title = {A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8039-8048} }
ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Wenyang and Hu, Zhanxuan and Zhang, Yaping and Ning, Hailong and Tai, Yonghang}, title = {ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7408-7418} }
Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Yunlong and Shimada, Daiki and Hua, Hang and Huang, Chao and Bi, Jing and Feris, Rogerio and Xu, Chenliang}, title = {Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8314-8325} }
Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yushuo and Duan, Huiyu and Zhang, Zicheng and Liu, Xiaohong and Min, Xiongkuo}, title = {Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7208-7219} }
Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Zhang, Shikun and Ye, Wei}, title = {Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9574-9585} }
MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Jian and Jiao, Yifan and Shao, Xi and Bao, Bing-Kun}, title = {MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7758-7767} }
Learning to Select Visual In-Context Demonstrations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Eugene and Lin, Yu-Chi and Diao, Jiajie}, title = {Learning to Select Visual In-Context Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9455-9465} }
MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Ma, Xianping and Wang, Ziyao and Zhang, Hongyang and Pun, Man On}, title = {MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7251-7261} }
Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Qiji and Yang, Chuanguang and An, Zhulin and Huang, Libo and Zhao, Erhu and Li, Yuqi and Xu, Yongjun}, title = {Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9531-9541} }
PTAD: Pose and Texture Agnostic Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuo_2026_CVPR, author = {Zhuo, Wei and Xiang, Jianen and Liu, Miaomiao and Lu, Huajun}, title = {PTAD: Pose and Texture Agnostic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6779-6788} }
Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation-
[pdf]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Junhao and Zhang, Chaoyang and Zhang, Yecheng and Zhou, Chengyang and Wang, Zhichang and Liu, Bochun and Yin, Dongshuo}, title = {Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8607-8617} }
GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Pengyu and Dai, Yuqin and Yin, Jun and Zhong, Jing and Han, Ziyang and Shi, Chaoyang and Jin, ZhanXiang and Jiang, Maowei and Han, Yuxing and Lu, Shuai}, title = {GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8596-8606} }
Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xiaoxi and Sun, Bo and An, Yisheng and Liu, Ganchao}, title = {Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7188-7197} }
Label-Agnostic Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Bian_2026_CVPR, author = {Bian, Yuwei and Wang, Shidong and Li, Chunming and Zhang, Haofeng}, title = {Label-Agnostic Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7573-7582} }
Autoregressive Universal Video Segmentation Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Heo_2026_CVPR, author = {Heo, Miran and Hwang, Sukjun and Chen, Min-Hung and Wang, Yu-Chiang Frank and Gu, Albert and Kim, Seon Joo and Hachiuma, Ryo}, title = {Autoregressive Universal Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7429-7438} }
Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Chaki_2026_CVPR, author = {Chaki, Sayan Kumar and Fournel, Thierry and Emonet, R\'emi}, title = {Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7008-7017} }
Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xi and Zhu, Hanwei and Wang, Jiamang and Wu, Xiaolin and Lin, Weisi}, title = {Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9305-9315} }
Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinzhuo and Juvekar, Adheesh and Zhang, Jiaxun and Liu, Xingyou and Wahed, Muntasir and Nguyen, Kiet A. and Shen, Yifan and Yu, Tianjiao and Lourentzou, Ismini}, title = {Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7450-7460} }
MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2026_CVPR, author = {Leng, Sicong and Wang, Jing and Li, Jiaxi and Zhang, Hao and Hu, Zhiqiang and Zhang, Boqiang and Jiang, Yuming and Zhang, Hang and Li, Xin and Zhao, Deli and Lu, Wei and Rong, Yu and Sun, Aixin and Lu, Shijian}, title = {MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9075-9087} }
Seeing Through Fog: Towards Fog-Invariant Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Enqi and Pan, Liyuan and Gao, Zhi and Li, Lingzhi and Li, Qing}, title = {Seeing Through Fog: Towards Fog-Invariant Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6966-6975} }
Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Cheng-Yen and Huang, Hsiang-Wei and Chen, Kuang-Ming and Li, Kunjun and Hwang, Jenq-Neng}, title = {Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8358-8367} }
From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Xiang and Wang, Yu-Shuen}, title = {From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9520-9530} }
ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval-
[pdf]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haiming and Wang, Tai}, title = {ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8164-8174} }
Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Dai, Wei and Wang, Haoyu and Yang, Sihan and Bi, Haixia and Sun, Jian}, title = {Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7520-7529} }
GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Chaewon and Heo, JunHyeok and Kim, Chang-Su}, title = {GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8736-8745} }
Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hsu_2026_CVPR, author = {Hsu, YuChe and Wang, AnJui and Ni, TsaiChing and Yang, YuanFu}, title = {Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8705-8714} }
MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongyu and Liu, Pengbo}, title = {MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9674-9683} }
SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Thengane_2026_CVPR, author = {Thengane, Vishal and An, Zhaochong and Huang, Tianjin and Phung, Son Lam and Bouzerdoum, Abdesselam and Yin, Lu and Zhao, Na and Zhu, Xiatian}, title = {SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7368-7377} }
From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dinh_2026_CVPR, author = {Dinh, My H. and Sant, Aditya and Malhotra, Akshay and Patani, Keya and Hamidi-Rad, Shahab}, title = {From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7070-7079} }
UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lian_2026_CVPR, author = {Lian, Shuquan and Wu, Yuhang and Ma, Jia and Ding, Yifan and Song, Zihan and Chen, Bingqi and Zheng, Xiawu and Li, Hui and Ji, Rongrong}, title = {UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8575-8584} }
SCP: Spatial Causal Prediction in Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yanguang and Yang, Jie and Wu, Shengqiong and Hu, Shutong and Qiu, Hongbo and Wang, Yu and Zhang, Guijia and Ze, Tan Kai and Fei, Hao and Lin, Chia-Wen and Lee, Mong-Li and Hsu, Wynne}, title = {SCP: Spatial Causal Prediction in Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7165-7175} }
CineMatte: Background Matting for Virtual Production and Beyond-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuanjian and Zhang, Chen and Chen, Fasheng and Cao, Jiangbo}, title = {CineMatte: Background Matting for Virtual Production and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8725-8735} }
DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Tepakbong_2026_CVPR, author = {Tepakbong, Cyril Kana and Bouchard, K\'evin and Maitre, Julien}, title = {DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7483-7492} }
Complexity of Linear Regions in Self-supervised Deep ReLU Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Muthivhi_2026_CVPR, author = {Muthivhi, Mufhumudzi and van Zyl, Terence L.}, title = {Complexity of Linear Regions in Self-supervised Deep ReLU Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6911-6920} }
Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity-
[pdf]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Peicheng and Fang, Shancheng and Jin, Chenhui and Pu, Bowei and Xie, Hongtao}, title = {Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6799-6808} }
DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yadav_2026_CVPR, author = {Yadav, Dhenenjay and Sawai, Rohan}, title = {DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7625-7633} }
Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Zhilong and Zhang, Hang and Li, Yanmin and Liu, Lihua and Wu, Jibing and Wang, Mao}, title = {Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7810-7819} }
Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yanan and Xiang, Ziwei and Wu, Jiamin and Guo, Jinyang and Zhang, Hongyuan and Song, Chunfeng and Fang, Hongjian and Guo, Yufei and Liu, Xianglong}, title = {Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6667-6676} }
STORM: End-to-End Referring Multi-Object Tracking in Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zijia and Yi, Jingru and Wang, Jue and Chen, Yuxiao and Chen, Junwen and Li, Xinyu and Modolo, Davide}, title = {STORM: End-to-End Referring Multi-Object Tracking in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8347-8357} }
Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Khiem, Le Huy and Tran, Van-Tuan and Doan, Khoa D and Chawla, Nitesh V. and Wong, Kok-Seng}, title = {Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7728-7736} }
Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jia and Li, Zhankai and Yu, Yongqiang and Yan, Xuehu and Lu, Yuliang}, title = {Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7935-7944} }
UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Wufei and Cen, Sky and Shen, Jianzhi and Lee, Rex and Begiristain, Le\'on and Zhuang, Yan and Peng, Jiawei and Yu, Zhifei and Song, Tianao and Qi, Xinyuan and Shu, Tianmin and Kortylewski, Adam and Yuille, Alan}, title = {UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9716-9725} }
CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xinyu and Sun, Shiliang}, title = {CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9065-9074} }
GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation-
[pdf]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Changqun and Yin, Wangxiandi and Hu, Xin and Zhao, Lei and Zhang, Dongyang and He, Tao}, title = {GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7143-7153} }
IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Qian and Fan, Shuaipeng and Gao, Fei and Zhang, Mingjin}, title = {IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8408-8418} }
Multimodal Reasoning with Explicit Reasoning Patterns and Rewards-
[pdf]
[supp]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Han and Jin, Sheng and Zuo, Zhongrong and Wang, Ziyue and She, Qi and Shao, Ling and Lu, Shijian}, title = {Multimodal Reasoning with Explicit Reasoning Patterns and Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9542-9551} }
Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zheyuan and Zhao, Qingsong and Wang, Yusong and Huang, Zhaohong and Li, Xinqi and Yuan, Chen and Shao, Jiawei and Zhang, Chi and Li, Xuelong}, title = {Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8585-8595} }
Harmonized Multi-Layer Text-to-Image Generation with Generative Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dalva_2026_CVPR, author = {Dalva, Yusuf and Li, Yijun and Liu, Qing and Zhao, Nanxuan and Zhang, Jianming and Lin, Zhe and Yanardag, Pinar}, title = {Harmonized Multi-Layer Text-to-Image Generation with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8471-8480} }
100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Cunqi and Zhou, Peng and Qin, Jie and Tian, Qi}, title = {100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8449-8460} }
Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yinbo and Wu, Qi and Ye, Keyang and He, Xiao and Tian, Tian}, title = {Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8492-8503} }
Face Time Traveller : Travel Through Ages Without Losing Identity-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kar_2026_CVPR, author = {Kar, Purbayan and Ghadiya, Ayush and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {Face Time Traveller : Travel Through Ages Without Losing Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8756-8765} }
CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering-
[pdf]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Zai Yang and Wang, Changshuo and Shi, Yuan and Sun, Linjun and Wei, Shu and Wang, Tingran and Wu, Wangyu and Li, Yanjie and Li, Weijun}, title = {CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7220-7230} }
Prompt-driven Small Object Instance Segmentation in Earth Observation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenhao and Ji, Yingrui and Meng, Yu and Zhang, Yunjian and Zhu, Yao}, title = {Prompt-driven Small Object Instance Segmentation in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7347-7356} }
Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiqiang and Tao, Renshuai and Zhang, Chunjie and Yang, Guodong and Zheng, Xiaolong and Zhao, Yao}, title = {Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8090-8100} }
Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Wang, Qinian and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi}, title = {Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9248-9258} }
Towards Efficient Multimodal Unified Reasoning Model via Model Merging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Qixiang and Yao, Huanjin and Chen, Jianghao and Huang, Jiaxing and Zhao, Zhicheng and Su, Fei}, title = {Towards Efficient Multimodal Unified Reasoning Model via Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9378-9388} }
LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Zhao, Shiyu and Chen, Yuxiao and Wang, Zhenting and Jin, Can and Zhao, Mingyu and Metaxas, Dimitris N.}, title = {LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9643-9653} }
Hierarchical Textual Knowledge for Enhanced Image Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yijie and Gao, Yunfan and Jiang, Weipeng and Wang, Haofen}, title = {Hierarchical Textual Knowledge for Enhanced Image Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9749-9758} }
Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nanxiang and Fan, Zhaoxin and Kang, Enhan and Gao, Daiheng and Zhou, Yun and Chang, Yanxia and Zhu, Zheng and Jin, Yeying and Wu, Wenjun}, title = {Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8080-8089} }
Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Zailong and Gao, Peng and Barthelemy, Johan and Zhou, Luping and Wang, Lei}, title = {Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9327-9336} }
Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Wenxuan and Zhao, Yanjun and Yang, Xiyuan and He, Jingrui}, title = {Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9632-9642} }
ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Wu, Ou}, title = {ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7914-7923} }
Open World Image Aesthetic Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Liao_2026_CVPR, author = {Liao, Mingxiang and Ma, Tianren and Zhang, Xijin}, title = {Open World Image Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9791-9801} }
Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners-
[pdf]
[supp]
[bibtex]@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Wang, Xiaoyang and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7665-7674} }
Asymmetric Collaborative Distillation for Asymmetric Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yi and Zhang, Huaidong and Luo, Xuandi and Zhou, Yan and He, Shengfeng}, title = {Asymmetric Collaborative Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6706-6716} }
Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Weijian and Sun, Shizhao and Yu, Tianyu and Wang, Ruiyu and Chua, Tat-Seng and Bian, Jiang}, title = {Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8919-8929} }
Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shoby_2026_CVPR, author = {Shoby, Abin and Huy, Ta Duc and Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and van den Hengel, Anton and Le Nguyen, Phi and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9185-9194} }
Large Multimodal Models as General In-Context Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Garosi_2026_CVPR, author = {Garosi, Marco and Farina, Matteo and Conti, Alessandro and Mancini, Massimiliano and Ricci, Elisa}, title = {Large Multimodal Models as General In-Context Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6727-6736} }
Visual Reasoning Through Tool-Supervised Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Qihua and Sahin, Gozde and Wang, Pei and Cai, Zhaowei and Shrestha, Robik and Yang, Hao and Modolo, Davide}, title = {Visual Reasoning Through Tool-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8993-9002} }
DM^3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Weiran and Liu, Yeqiang and Wei, Yijie and Han, Mina and Guo, Qiannan and Li, Zhenbo}, title = {DM{\textasciicircum}3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8398-8407} }
Memory-efficient Continual Learning with Prototypical Exemplar Condensation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, M.-Duong and Dao, Thien-Thanh and Nguyen, Le-Tuan and Le, Dung D. and Wong, Kok-Seng}, title = {Memory-efficient Continual Learning with Prototypical Exemplar Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7675-7685} }
Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yujie and Zhang, Hu and Liang, Jiye and Wang, Zhiqiang and Tan, Hongye and Li, Ru}, title = {Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9282-9293} }
CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Shi, Tengfei and Wang, Xuehao and Li, Ming and Chen, Chenglizhao and Song, Wenfeng and Hao, Aimin}, title = {CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7241-7250} }
Direct Language Embedding Enables Gaussian Splatting for Large Scenes-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Zhida and Zhu, Jianqiao and Huang, Hejin and Qin, Yipeng and Yang, Sibei and Li, Guanbin}, title = {Direct Language Embedding Enables Gaussian Splatting for Large Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7231-7240} }
AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Jianheng and He, Jingyu and Fan, Kejia and He, Run and Wang, Jingchao and Liu, Anfeng and Song, Houbing Herbert and Wang, Leye and Zhu, Zhanxing and Zhuang, Huiping and Liu, Yunhuai}, title = {AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7768-7778} }
DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering-
[pdf]
[supp]
[bibtex]@InProceedings{Araneda_2026_CVPR, author = {Araneda, Guillermo Figueroa and Jimenez, Iris Dania and Hofherr, Florian and Ko, Manny and Andrade-Loarca, Hector and Cremers, Daniel}, title = {DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8461-8470} }
SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Meng and Li, Xingyu and Liu, Xue and Reid, Ian and Liang, Xiaodan}, title = {SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7176-7187} }
One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Darur_2026_CVPR, author = {Darur, Balaji and Garg, Amanmeet and Tapaswi, Makarand}, title = {One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8268-8279} }
DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Okazaki_2026_CVPR, author = {Okazaki, Soichiro and Sasaki, Tatsuya and Ohashi, Hiroki}, title = {DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6890-6900} }
DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yang and Xu, Kai and Hou, Junyao and Zhang, Miao and Li, Xiang and Chen, Zhenghua and Gao, Yingxue and Wu, Min}, title = {DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7789-7798} }
SemanticMoments: Training-Free Motion Similarity via Third Moment Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Goldberg, Kfir and Patashnik, Or and Benaim, Sagie and Mokady, Ron}, title = {SemanticMoments: Training-Free Motion Similarity via Third Moment Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8419-8428} }
Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Aota_2026_CVPR, author = {Aota, Toshimichi and Hashimoto, Akinori and Sekizuka, Naoto and Okatani, Takayuki}, title = {Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6932-6942} }
SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification-
[pdf]
[bibtex]@InProceedings{Hsieh_2026_CVPR, author = {Hsieh, Jun Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching}, title = {SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6839-6848} }
PSIM: Perceptual Similarity Index Measure-
[pdf]
[supp]
[bibtex]@InProceedings{Eimon_2026_CVPR, author = {Eimon, Md Eimran Hossain and Kalva, Hari}, title = {PSIM: Perceptual Similarity Index Measure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8564-8574} }
StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yifei and Li, Zhenkai and Qian, Tianwen and Zheng, Huanran and Wang, Zheng and Fu, Yuqian and Wang, Xiaoling}, title = {StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9422-9432} }
KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Dewi_2026_CVPR, author = {Dewi, Christine and Thiruvady, Dhananjay R and Zaidi, Nayyar}, title = {KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8766-8775} }
When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Gao, Yulang and Zou, Jiachen and Wei, Chen and Liu, Quanying}, title = {When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8111-8120} }
StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ben_Mabrouk_2026_CVPR, author = {Ben Mabrouk, Souheib and Deschaud, Jean-Emmanuel and Coupet\'e, Eva and Derbanne, Thomas and Rahmouni, Nicolas}, title = {StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8481-8491} }
Mitigating Vision-Text Order Bias in Vision-Language Model-
[pdf]
[bibtex]@InProceedings{Gan_2026_CVPR, author = {Gan, Weilin and Song, Yifan and Yu, Zhuocheng and Li, Sujian}, title = {Mitigating Vision-Text Order Bias in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9664-9673} }
Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Aoqiang and Hu, Min and Xing, Yan and Tang, Yiming}, title = {Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6921-6931} }
ReConText3D: Replay-based Continual Text-to-3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Ahmed Ullah and Bin Amir, Muhammad Haris and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ReConText3D: Replay-based Continual Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7893-7902} }
TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Yao, Chao and Liu, Meiqin and Xiao, Jimin}, title = {TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7472-7482} }
Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2026_CVPR, author = {Ke, Junlong and Wen, Zichen and Yang, Boxue and Yang, Yantai and Liu, Xuyang and Liao, Chenfei and Chen, Zhaorun and Wang, Shaobo and Zhang, Linfeng}, title = {Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9131-9142} }
Super Sparse DETR: YOLO-Competitive Convergence and Acceleration-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hebao}, title = {Super Sparse DETR: YOLO-Competitive Convergence and Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6677-6684} }
Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jungwon and Kim, Eunwoo}, title = {Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9368-9377} }
HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Hui and Ke, Xiao and Zeng, Zhihong and Xu, Huangbiao and Wu, Huanqi}, title = {HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8337-8346} }
Online Interpretable Matrix Decomposition for Large-Scale Streaming Data-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelgawad_2026_CVPR, author = {Abdelgawad, Muhammad A. A. and Eldaly, Abdelrahman B. M. and Xinmin, Meng and Jing, Peng and Sanka, Abdurrashid Ibrahim and Cheung, Ray C.C. and Yan, Hong}, title = {Online Interpretable Matrix Decomposition for Large-Scale Streaming Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7030-7039} }
ROSE: Retrieval-Oriented Segmentation Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Song and Jie, Guangquan and Ding, Henghui and Jiang, Yu-Gang}, title = {ROSE: Retrieval-Oriented Segmentation Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7398-7407} }
MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, JaeHyuck and Kim, Minjun and Hong, Je Hyeong}, title = {MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8524-8533} }
GRAFT: Graph-Based Affordance Transfer via Part Correspondence-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Mengying and Mishra, Utkarsh and Mandlekar, Ajay and Xu, Danfei}, title = {GRAFT: Graph-Based Affordance Transfer via Part Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8746-8755} }
POMA-3D: The Point Map Way to 3D Scene Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ye and Luo, Weixun and Huang, Ranran and Jing, Junpeng and Mikolajczyk, Krystian}, title = {POMA-3D: The Point Map Way to 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7282-7292} }
BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziyi and Zhou, Jinzhao and Jiang, Xiaowei and Cao, Beining and Ma, Wenhao and Shen, Yang and Li, Ren and Wang, Yu-Kai and Lin, Chin-teng}, title = {BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7050-7059} }
Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization-
[pdf]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Chao and Zhang, Junbo and Zhu, Chuanbo and Huang, Mingjun and Du, Bo}, title = {Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7272-7281} }
Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Qingtao and Dou, Zhihao and Li, Shuo}, title = {Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9726-9737} }
Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision-
[pdf]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Zezheng and Liu, Runxiang and Fang, Yuming and Zuo, Yifan and Yan, Jiebin}, title = {Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7872-7882} }
ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Quanyuan and Shi, Kewei and Lei, Jiabao and Gao, Xifeng and Han, Xiaoguang}, title = {ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8439-8448} }
VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jianxiang and Hong, Meisheng and Li, Jungang and Guo, Weiyu and Hu, Xuming and Xiong, Hui}, title = {VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9003-9012} }
MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Changhui and Nagarajan, Bhalaji and Marques, Ricardo and Radeva, Petia}, title = {MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7018-7029} }
Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Mao_2026_CVPR, author = {Mao, Ruichao and Fang, Zhou and Guo, Teng and Yang, Hao and Li, Yaping and Peng, Shaohua and Huang, Maji and Lin, Xiaoyu and Liu, Shuoyang and Li, Xuepeng and Zhang, Yuyu and Rao, Hai}, title = {Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8983-8992} }
Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wulff_2026_CVPR, author = {Wulff, Theodor and Tavella, Federico and Maharjan, Rahul Singh and Adikari, Manith and Cangelosi, Angelo}, title = {Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9269-9281} }
AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Ma, Yidan and Qu, Hanzhang and Cao, Jianfu}, title = {AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7552-7561} }
Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Yichun and Hu, Zixuan and Duan, Ling-Yu}, title = {Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6849-6858} }
Advancing Open-Set Detection and Segmentation via Disentangled Representations-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haokang and Guan, Yuchen and Cheng, Runxi and Yang, Yujiu}, title = {Advancing Open-Set Detection and Segmentation via Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6622-6632} }
Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Chen, Xinyan and Zhang, Renrui and An, Ruichuan and Qi, Yu and Jiang, Dongzhi and Li, Xiangtai and Zhang, Manyuan and Li, Hongsheng and Heng, Pheng-Ann}, title = {Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9175-9184} }
FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation-
[pdf]
[bibtex]@InProceedings{Rao_2026_CVPR, author = {Rao, Zhi and Zhou, Yucheng and Zhou, Benjia and Huang, Yiqing and Escalera, Sergio and Wan, Jun}, title = {FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9237-9247} }
Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2026_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Nguyen, Ngoc-Bao and Binder, Alexander and Cheung, Ngai-Man}, title = {Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8070-8079} }
Layer Embedding Deep Fusion Graph Neural Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Taihua and Tian, Genhao and Fan, Jicong and Yang, Xibei and Zhang, Qinghua and Cui, Yun}, title = {Layer Embedding Deep Fusion Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7091-7100} }
Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Jiedong and Lu, Lu and Dai, Ming and Chen, Jian and Liu, Qiang and Hu, Haoji}, title = {Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9023-9033} }
FraQAT: Quantization Aware Training with Fractional Bits-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Morreale_2026_CVPR, author = {Morreale, Luca and Gil C P Ramos, Alberto and Chadwick, Malcolm and Noroozi, Mehdi and Chavhan, Ruchika and Mehrotra, Abhinav}, title = {FraQAT: Quantization Aware Training with Fractional Bits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8514-8523} }
A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Changyu and Liang, James Chenhao and Yang, Wenhao and Cui, Yiming and Yang, Jinghao and Wang, Tianyang and Wang, Qifan and Liu, Dongfang and Han, Cheng}, title = {A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6943-6954} }
VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haitao and Li, Xu and Cao, Yuanyang and Zhang, Ying and Wang, Jianji}, title = {VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6809-6818} }
Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zidong and Huang, Yihao and Guo, Qing and Li, Tianlin and Li, Anran and Wang, Kailong and Dong, Jin Song and Pu, Geguang}, title = {Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8049-8058} }
EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification-
[pdf]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yuanlin and Wang, Zhenchuan and Chen, Jun and He, Yingying and Wang, Jiabao and Wang, Weiwen and Xu, Kun and Zhou, Zijin and Wang, Xiaoxiao and Chen, Mingju and Liu, Tingting and Pan, Zhisong}, title = {EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6758-6767} }
Beyond Syntax: Action Semantics Learning for App Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bohan and Luo, Dezhao and Liu, Jianheng and Chen, Jingxuan and Gong, Shaogang and Hao, Jianye and Wang, Jun and Shao, Kun}, title = {Beyond Syntax: Action Semantics Learning for App Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9444-9454} }
Weakly-Supervised Referring Video Object Segmentation Through Text Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Miaojing and Huang, Jun and Yue, Zijie and Wang, Hanli}, title = {Weakly-Supervised Referring Video Object Segmentation Through Text Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7461-7471} }
Don't Let the Information Slip Away-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Taozhe and Wang, Guansu and Yu, Bo and Liu, Yiming and Sun, Wei}, title = {Don't Let the Information Slip Away}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8504-8513} }
Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tezuka_2026_CVPR, author = {Tezuka, Ryuki and Nakatani, Chihiro and Ukita, Norimichi}, title = {Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8215-8225} }
Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoguang and Guo, Weiyu and Chen, Ziyang and Xu, Yijie and Hu, Xuming and Xiong, Hui}, title = {Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9856-9866} }
HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Haoxuan and Li, Mengyan and Zheng, Junjun}, title = {HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8195-8204} }
Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Jielun and Wang, Yabin and Li, Yaqi and Kong, Long and Hong, Xiaopeng}, title = {Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6655-6666} }
RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2026_CVPR, author = {Cho, Chanseul and Yun, Seokju and Jun, Jaesung and Moon, Seungjae and Ro, Youngmin}, title = {RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7503-7513} }
Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, JunJie and Li, Miyu and Wang, Jiawei and Liu, Yu and Wang, Yumei}, title = {Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8887-8896} }
PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhilin and Zhang, Xiang and Wei, Jiaqi and Xu, Yiwei and You, Chenyu}, title = {PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9813-9823} }
Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaxuan and Xu, Qianqian and Wen, Peisong and Dai, Siran and Liu, Yang and Huang, Qingming}, title = {Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7924-7934} }
Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Abid_2026_CVPR, author = {Abid, Mian Muhammad Naeem and Timofte, Radu}, title = {Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7293-7303} }
REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Chu_2026_CVPR, author = {Chu, Chengxi and Japar, Nurul and Lim, Chee Kau}, title = {REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8280-8290} }
ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Qin and Li, Qi and Liu, Limei and Yang, Junfeng and Peng, Han}, title = {ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6997-7007} }
Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Jiao, Qiang and Shi, Mengrui and Zhang, Qiang}, title = {Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7378-7387} }
Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Srinivas_2026_CVPR, author = {Srinivas, Gurucharan and Niemeijer, Joshua and K\"oster, Frank}, title = {Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7122-7131} }
Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaowei and Liu, Zhide and Ma, Yuqing and Liu, Xianglong}, title = {Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9357-9367} }
Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection-
[pdf]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Ziyang and Chen, Weiyan and Xiao, Yao and Cao, Zijie and Zhang, Dongyu and Wei, Pengxu}, title = {Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8715-8724} }
IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and Huang, Shijia and Wang, Liwei}, title = {IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8776-8785} }
GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting-
[pdf]
[bibtex]@InProceedings{Ping_2026_CVPR, author = {Ping, Yuhan and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Pan, Jia and Wang, Wenping}, title = {GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7132-7142} }
DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Deng, Yuanyuan and Zhou, Kun}, title = {DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7654-7664} }
UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8121-8132} }
Another BRIXEL in the Wall: Towards Cheaper Dense Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lappe_2026_CVPR, author = {Lappe, Alexander and Giese, Martin A.}, title = {Another BRIXEL in the Wall: Towards Cheaper Dense Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7605-7614} }
VIDEOP2R: Video Understanding from Perception to Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yifan and Wang, Yueying and Zhao, Rui and Parag, Toufiq and Chen, Zhimin and Liao, Zhenyu and Unnikrishnan, Jayakrishnan}, title = {VIDEOP2R: Video Understanding from Perception to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8303-8313} }
Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuexin and Wang, Xiaolei and Cheng, Guangliang and Bai, Huihui and Tillo, Tammam and Xiao, Jimin}, title = {Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8836-8845} }
Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yongqi and Li, Yuke and Huang, Heng and Li, Zhihui and Du, Bo and Wu, Yu}, title = {Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8019-8028} }
Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peng and Yuan, Xiang and Li, Cong and Han, Junwei and Cheng, Gong}, title = {Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6829-6838} }
Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2026_CVPR, author = {Reddy, Sanjana and Malhi, Ishaan and Ma, Sally and Dutta, Praneet}, title = {Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8868-8876} }
VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2026_CVPR, author = {Han, Hui and Wang, Shunli and Zhao, Yandan and Yao, Taiping and Ding, Shouhong}, title = {VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9552-9562} }
Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2026_CVPR, author = {Chung, Hyungjin and Nam, Hyelin and Kim, Jiyeon and Go, Hyojun and Park, Byeongjun and Kim, Junho and Lee, Joonseok and Ha, Seongsu and Kim, Byung-Hoon}, title = {Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8972-8982} }
NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Amemiya_2026_CVPR, author = {Amemiya, Kanon and Yashima, Daichi and Katsumata, Kei and Komatsu, Takumi and Korekata, Ryosuke and Otsuki, Seitaro and Sugiura, Komei}, title = {NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9034-9044} }
AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijin and Zhang, Songan}, title = {AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9259-9268} }
CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashwin and Holland, Robbie and Barrett, Corey and Kim, Jangwon and Varma, Maya and Chen, Zhihong and Gao, Yunhe and Zaharchuk, Greg and Taghavi, Tara and Kenthapadi, Krishnaram and Chaudhari, Akshay}, title = {CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9466-9476} }
Entropy-Based Visual Re-perception Inference for Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Liufu_2026_CVPR, author = {Liufu, Jia and Yan, Qiangyu and Kan, Zhehan and Yang, Wenming and Hu, Hailin and Chen, Xinghao and Jiang, Borui}, title = {Entropy-Based Visual Re-perception Inference for Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9770-9779} }
Is Prompt Selection Necessary for Task-Free Online Continual Learning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Seoyoung and Lee, Haemin and Lee, Hankook}, title = {Is Prompt Selection Necessary for Task-Free Online Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7883-7892} }
Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hantao and Han, Ning and Zeng, Yawen and Zhu, Hegui and Chen, Hao}, title = {Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9045-9054} }
ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Yi_2026_CVPR, author = {Yi, Ling and Chen, Zhe and Wu, Gaochang and Ding, Jinliang and Wang, Xiaojie and Ning, Zhaolong}, title = {ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7697-7705} }
CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment-
[pdf]
[supp]
[bibtex]@InProceedings{Merothiya_2026_CVPR, author = {Merothiya, Satyam and Kamra, Chanda Grover and Mastan, Indra Deep}, title = {CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8695-8704} }
FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Mengjie and Yang, Liu and Shen, Qi}, title = {FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6987-6996} }
Model Merging on Loss Landscapes: A Geometric Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Juanwu and Bhaskar, Anand and Axelrod, Brian and Tolstaya, Ekaterina and Emrich, Tristan}, title = {Model Merging on Loss Landscapes: A Geometric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7644-7653} }
SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Yongchao and Huang, Ziyue and Zhang, Jinqing and Cai, Wenrui and Liu, Qingjie}, title = {SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7779-7788} }
VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zihu and Xu, Boxun and Xia, Yuxuan and Li, Peng}, title = {VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9055-9064} }
Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Woo_2026_CVPR, author = {Woo, Sungwon and Hwang, Dongjun and Kim, Shiwon and Choe, Junsuk and Nang, Jongho}, title = {Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7634-7643} }
FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yicong and Leung, Howard}, title = {FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8258-8267} }
WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation-
[pdf]
[supp]
[bibtex]@InProceedings{Kong_2026_CVPR, author = {Kong, Z. Jonny and Paul, Sibendu and Hu, Y. Charlie}, title = {WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8618-8628} }
Organizing Unstructured Image Collections using Natural Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxuan and Zhong, Zhun and Li, Jun and Franchi, Gianni and Roy, Subhankar and Ricci, Elisa}, title = {Organizing Unstructured Image Collections using Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8907-8918} }
Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Jouwon and Kim, Sohyeon and Kong, Kyeongbo}, title = {Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9510-9519} }
BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziheng and Guo, Yuncheng and Xu, Jie and Gu, Xiaodong}, title = {BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7060-7069} }
Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation-
[pdf]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Shuqi and Shi, Guangze and Cao, Jiarui and Shi, Aoyuan and Liu, Meilin and Zhang, Xiaoyi and Wang, Yujie and Liu, Xueyu and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang}, title = {Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7514-7519} }
VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2026_CVPR, author = {Hu, Zengjie and Qiu, Jiantao and Bai, Tianyi and Yang, Haojin and Yuan, Binhang and Jing, Qi and He, Conghui and Zhang, Wentao}, title = {VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9846-9855} }
Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung and Seo, Paul Hongsuck}, title = {Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8877-8886} }
GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyu and Shi, Yanzhao and Zheng, Chengxin and Wang, Hua and Wang, Jianing and Zhang, Yue and Yu, Xiaobing and Zhang, Xiaodan}, title = {GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9622-9631} }
AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuting and Wang, Lan and Zhao, Hengyuan and Huang, Linjiang and Liu, Si and Guo, Qingpei}, title = {AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9205-9214} }
SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Chengshan and Zhang, Pengnian and Zhao, Jinjing}, title = {SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6695-6705} }
PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xueheng and Hu, Tao and Cao, Ke and Qi, Runsheng and Zhang, Huixin and Li, Rui and Zhang, Jie and Xie, Chengjun}, title = {PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8826-8835} }
RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranjan_2026_CVPR, author = {Ranjan, Ravi and Grover, Utkarsh and Lin, Xiaomin and Polyzou, Agoritsa}, title = {RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7998-8008} }
Beyond Single Object: Learning 3D Relations with Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ide_2026_CVPR, author = {Ide, Kohsuke and Yamada, Ryousuke and Qiu, Yue and Ma, Xianzheng and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Satoh, Yutaka}, title = {Beyond Single Object: Learning 3D Relations with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9684-9694} }
Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Sijie and Zhu, Yingying}, title = {Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7615-7624} }
Learning from Label Proportion with Dual-Proportion Constraints-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Tianhao and Li, Ximing and Li, Changchun and Guan, Renchu}, title = {Learning from Label Proportion with Dual-Proportion Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7583-7592} }
Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhi_2026_CVPR, author = {Zhi, Helu and Huang, Jingjing and Xu, Wang and Xu, Yangbin and Huang, Yibin and Zhang, Wanyue and Jiang, Baoyang and Deng, Shirui and Zhu, Liang and Li, FangFang and Zhao, Tiejun and Lin, Yankai and Yao, Yuan}, title = {Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9215-9224} }
PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Tao and Meng, Jiayang and Chen, Hong and Hou, Chen and Zheng, Guolong and Yang, Xu}, title = {PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8029-8038} }
Gen-n-Val: Agentic Image Data Generation and Validation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Jing-En and Fang, I-Sheng and Huang, Tzuhsuan and Liu, Yu-Lun and Wang, Chih-Yu and Chen, Jun-Cheng}, title = {Gen-n-Val: Agentic Image Data Generation and Validation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8786-8795} }
QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zijun and Wu, Zhengqian and Zhang, Chunjie and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8247-8257} }
AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Chen, Yuanhong and Wang, Chong and Han, Junlin and Wu, Junde and Peng, Can and Chen, Jingkun and Tian, Yu and Carneiro, Gustavo}, title = {AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7315-7325} }
Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Eun-Ju and Shin, Youjin and Woo, Simon S.}, title = {Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7978-7987} }
Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Tian, Yapeng and Xiang, Yu and Guo, Yunhui}, title = {Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6789-6798} }
Towards Robust Content Watermarking Against Removal and Forgery Attacks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yifan and Wang, Yihan and Gao, Xiao-Shan}, title = {Towards Robust Content Watermarking Against Removal and Forgery Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8059-8069} }
ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Darabi_2026_CVPR, author = {Darabi, Nastaran and Trivedi, Amit Ranjan}, title = {ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9013-9022} }
Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yadang and Liu, Qi and Zhang, Guoqing and Sun, Le and Zheng, Yuhui}, title = {Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7841-7851} }
LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haichao and Lu, Yao and Wang, Lichen and Li, Yunzhe and Chen, Daiwei and Xu, Yunpeng and Fu, Yun}, title = {LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7111-7121} }
VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chenglin and Chen, Qianglong and Han, Feng and Wang, Yikun and Yin, Xingxi and Gong, Yan and Li, Ruilin and Zhang, Yin and Wang, Jiaqi}, title = {VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8226-8236} }
EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Runze and Zhai, Yuwen and Xu, Bo and Xu, Liwu and Shi, Nian and Zhang, Wei and Lin, Ran and Wang, Liang}, title = {EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9347-9356} }
V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zixu and Hu, Jian and Liu, Ziquan and Si, Chenyang and Li, Wei and Gong, Shaogang}, title = {V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9155-9164} }
Temporally Consistent Long-Term Memory for 3D Single Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yoo_2026_CVPR, author = {Yoo, Jaejoon and Lee, SuBeen and Jeon, Yerim and Lee, Miso and Heo, Jae-Pil}, title = {Temporally Consistent Long-Term Memory for 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8388-8397} }
Why MLLMs Struggle to Determine Object Orientations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gopinath_2026_CVPR, author = {Gopinath, Anju and Krishnaswamy, Nikhil and Draper, Bruce}, title = {Why MLLMs Struggle to Determine Object Orientations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9836-9845} }
Mull-Tokens: Modality-Agnostic Latent Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Arijit and Abdelkader, Ahmed and Mao, Chengzhi and Plummer, Bryan A. and Saenko, Kate and Krishna, Ranjay and Guibas, Leonidas and Chu, Wen-Sheng}, title = {Mull-Tokens: Modality-Agnostic Latent Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9477-9488} }
RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alama_2026_CVPR, author = {Alama, Omar and Jariwala, Darshil and Bhattacharya, Avigyan and Kim, Seungchan and Wang, Wenshan and Scherer, Sebastian}, title = {RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9294-9304} }
SPHINX: A Synthetic Environment for Visual Perception and Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alam_2026_CVPR, author = {Alam, Md Tanvirul and Aggarwal, Saksham and Chae, Justin Yang and Rastogi, Nidhi}, title = {SPHINX: A Synthetic Environment for Visual Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9489-9499} }
OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shipard_2026_CVPR, author = {Shipard, Jordan and Wiliem, Arnold and Thanh, Kien Nguyen and Xiang, Wei and Fookes, Clinton}, title = {OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6768-6778} }
Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jung_2026_CVPR, author = {Jung, Woojun and Go, Jaehoon and Jeon, Mingyu and Yoon, Sunjae and Kim, Junyeong}, title = {Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8962-8971} }
HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Zhinan and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8952-8961} }
Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Jin, Woojeong and Son, Soowon and Seo, Junyoung and Cho, Seokju and Baek, JeongYeol and Lee, Byeongwon and Lee, JoungBin and Kim, Seungryong}, title = {Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8640-8650} }
Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Ji_2026_CVPR, author = {Ji, Jianhang and Cheng, Zhiming and Zhao, Jianxiang and Ma, Bingtao and Chen, Hao and Gao, Yuhan and Zhang, Lian and Ying, Zuobin and Wang, Shuai}, title = {Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7799-7809} }
Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Chengzhi and Yang, Yuzhe and Fan, Yue and Wei, Qingyue and Liu, Sheng and Wang, Xin Eric}, title = {Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9225-9236} }
SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ruibin and Lin, Zhenyu and Zhao, Xinhai}, title = {SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8796-8805} }
Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Hongli and Wang, Yu and Zhao, Shengjie}, title = {Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7419-7428} }
A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Demirel_2026_CVPR, author = {Demirel, Mehmet and Kyrkou, Christos}, title = {A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7541-7551} }
Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps-
[pdf]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Xiangjun and Zhang, Zhensong and Chen, Dave Zhenyu and Xu, Songcen and Quan, Long and P\'erez-Pellitero, Eduardo and Jang, Youngkyoon}, title = {Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7154-7164} }
On the Group Disparities Arising from Machine Unlearning-
[pdf]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Zijie and Ying, Zuobin and Wang, Yajie and Zhu, Liehuang and Zhou, Wanlei}, title = {On the Group Disparities Arising from Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8133-8142} }
Language-Augmented Semantic Priors for B-Spline Surface Fitting-
[pdf]
[supp]
[bibtex]@InProceedings{Lou_2026_CVPR, author = {Lou, Yunzhong and Luo, Yusheng and Li, Jiahao and Song, Yu and Zhou, Xiangdong}, title = {Language-Augmented Semantic Priors for B-Spline Surface Fitting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9120-9130} }
FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ni_2026_CVPR, author = {Ni, Jingchen and Zhang, Quan and Jiang, Dan and Lv, Keyu and Zhang, Ke and Yuan, Chun}, title = {FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7439-7449} }
Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2026_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8816-8825} }
MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yisong and Yao, He and Cheng, Junlong and Lu, Yujie and Bai, Junqi and Zhu, Min}, title = {MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8378-8387} }
CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Qiyu and Qu, Zhen and Luo, Wei and Yao, Haiming and Cao, Yunkang and Jiang, Yuxin and Duan, Yinan and Luo, Huiyuan and Lv, Chengkan and Zhang, Zhengtao}, title = {CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8554-8563} }
Test-Time Distillation for Continual Model Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Huang, Jiazhen and Liu, Zhiming and Jiang, Qinting and Huang, Fanding and Jiang, Jingyan and Wang, Zhi}, title = {Test-Time Distillation for Continual Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7593-7604} }
Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xiu_2026_CVPR, author = {Xiu, Yanming and Jiang, Zhengyuan and Gong, Neil Zhenqiang and Gorlatova, Maria}, title = {Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9110-9119} }
Disrupting Positional Encoding for Effective Open Set Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Xie, Jiabo and Zhou, Yucan and Mu, Junxian and Hu, Qinghua and Zhu, Pengfei}, title = {Disrupting Positional Encoding for Effective Open Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6633-6642} }
Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Jianglin and Wu, Yuanwei and Zhao, Ziyi and Wang, Hongcheng and Jimenez, Felix and Majeedi, Abrar and Fu, Yun}, title = {Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8629-8639} }
Bi-Level Optimization for Single Domain Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Heidari_2026_CVPR, author = {Heidari, Marzi and Zhang, Hanping and Yan, Hao and Guo, Yuhong}, title = {Bi-Level Optimization for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6685-6694} }
EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinan and Irshad, Muhammad Zubair and Yezzi, Anthony and Tsai, Yi-Chang and Kira, Zsolt}, title = {EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8846-8856} }
Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dongfang_2026_CVPR, author = {Dongfang, Zihao and Zheng, Xu and Weng, Ziqiao and Lyu, Yuanhuiyi and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun and Hu, Xuming}, title = {Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9759-9769} }
SCOPE: Spatially Ordered Continual Learning for 3D Segmentation-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Wenhao and Zhang, Huaidong and Zhang, Weipeng and Zhang, Qianle and He, Shengfeng}, title = {SCOPE: Spatially Ordered Continual Learning for 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7862-7871} }
Back

