Findings

Back

Revisiting Real-Time Detection Transformer with Efficient Encoder Design
Jiannan Huang,
Aditya Kane,
Fengzhe Zhou,
Yunchao Wei,
Humphrey Shi
[pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiannan and Kane, Aditya and Zhou, Fengzhe and Wei, Yunchao and Shi, Humphrey}, title = {Revisiting Real-Time Detection Transformer with Efficient Encoder Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6859-6868} }

Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models
Hoin Jung,
Shenyu Lu,
De Wang,
Xiaoqian Wang
[pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Hoin and Lu, Shenyu and Wang, De and Wang, Xiaoqian}, title = {Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7956-7967} }

Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation
Shristi Das Biswas,
Arani Roy,
Kaushik Roy
[pdf] [supp]
[bibtex]
@InProceedings{Das_Biswas_2026_CVPR, author = {Das Biswas, Shristi and Roy, Arani and Roy, Kaushik}, title = {Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7903-7913} }

Towards Generalization of Scene Text Tampering Localization via Causal Invariance
Huiru Shao,
Bin Dong,
Kaizhu Huang,
Xiaowei Huang,
Qiufeng Wang
[pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Huiru and Dong, Bin and Huang, Kaizhu and Huang, Xiaowei and Wang, Qiufeng}, title = {Towards Generalization of Scene Text Tampering Localization via Causal Invariance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7262-7271} }

TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation
K Naveen Kumar,
Mohsen Guizani
[pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, K Naveen and Guizani, Mohsen}, title = {TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7945-7955} }

CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images
Chengqi Duan,
Kaiyue Sun,
Rongyao Fang,
Manyuan Zhang,
Yan Feng,
Ying Luo,
Yufang Liu,
Ke Wang,
Peng Pei,
Xunliang Cai,
Hongsheng Li,
Yi Ma,
Xihui Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Chengqi and Sun, Kaiyue and Fang, Rongyao and Zhang, Manyuan and Feng, Yan and Luo, Ying and Liu, Yufang and Wang, Ke and Pei, Peng and Cai, Xunliang and Li, Hongsheng and Ma, Yi and Liu, Xihui}, title = {CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9586-9596} }

Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather
Nikhil Kumar Jangamreddy,
Mahsa Baktashmotlagh,
Chetan Arora
[pdf] [supp]
[bibtex]
@InProceedings{Jangamreddy_2026_CVPR, author = {Jangamreddy, Nikhil Kumar and Baktashmotlagh, Mahsa and Arora, Chetan}, title = {Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7686-7696} }

Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models
Yujun Tong,
Dongliang Chang,
Zijin Yin,
Xintong Liu,
Yuanchen Fang,
Zhanyu Ma
[pdf] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Yujun and Chang, Dongliang and Yin, Zijin and Liu, Xintong and Fang, Yuanchen and Ma, Zhanyu}, title = {Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6976-6986} }

VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection
Chupeng Liu,
Jiyong Rao,
Shangquan Sun,
Runkai Zhao,
Weidong Cai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chupeng and Rao, Jiyong and Sun, Shangquan and Zhao, Runkai and Cai, Weidong}, title = {VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7530-7540} }

Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning
Siqi Yang,
Zilve Gao,
Haibo Qiu,
Fanfan Liu,
Peng Shi,
Zhixiong Zeng,
Qingmin Liao,
Lin Ma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Siqi and Gao, Zilve and Qiu, Haibo and Liu, Fanfan and Shi, Peng and Zeng, Zhixiong and Liao, Qingmin and Ma, Lin}, title = {Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9738-9748} }

QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery
Xuan Bac Nguyen,
Hoang-Quan Nguyen,
Sankalp Pandey,
Tim Faltermeier,
Nicholas Borys,
Hugh Churchill,
Khoa Luu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Xuan Bac and Nguyen, Hoang-Quan and Pandey, Sankalp and Faltermeier, Tim and Borys, Nicholas and Churchill, Hugh and Luu, Khoa}, title = {QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8684-8694} }

DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding
Tanveer Hannan,
Dimitrios Mallios,
Parth Pathak,
Faegheh Sardari,
Thomas Seidl,
Gedas Bertasius,
Mohsen Fayyaz,
Sunando Sengupta
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hannan_2026_CVPR, author = {Hannan, Tanveer and Mallios, Dimitrios and Pathak, Parth and Sardari, Faegheh and Seidl, Thomas and Bertasius, Gedas and Fayyaz, Mohsen and Sengupta, Sunando}, title = {DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9337-9346} }

Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting
Shizhao Gao,
Jun Li,
Qiming Li
[pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Shizhao and Li, Jun and Li, Qiming}, title = {Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8143-8152} }

Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation
Yizhou Liu,
Dingkang Yang,
Zizhi Chen,
Minghao Han,
Xukun Zhang,
Keliang Liu,
Jingwei Wei,
Lihua Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yizhou and Yang, Dingkang and Chen, Zizhi and Han, Minghao and Zhang, Xukun and Liu, Keliang and Wei, Jingwei and Zhang, Lihua}, title = {Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8651-8660} }

Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping
Youngmin Oh,
Changjae Oh,
Bumsub Ham
[pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Youngmin and Oh, Changjae and Ham, Bumsub}, title = {Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7493-7502} }

Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs
Yujin Jo,
Sangyoon Bae,
Taesup Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2026_CVPR, author = {Jo, Yujin and Bae, Sangyoon and Kim, Taesup}, title = {Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9706-9715} }

It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models
Jaeha Choi,
Jin Won Lee,
Siwoo You,
Jangho Lee
[pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeha and Lee, Jin Won and You, Siwoo and Lee, Jangho}, title = {It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9500-9509} }

STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding
Wenhao Li,
Xueying Jiang,
Gongjie Zhang,
Xiaoqin Zhang,
Ling Shao,
Shijian Lu
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Jiang, Xueying and Zhang, Gongjie and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8185-8194} }

VACoT: Rethinking Visual Data Augmentation with VLMs
Zhengzhuo Xu,
Chong Sun,
SiNan Du,
Chen Li,
Jing Lyu,
Chun Yuan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengzhuo and Sun, Chong and Du, SiNan and Li, Chen and Lyu, Jing and Yuan, Chun}, title = {VACoT: Rethinking Visual Data Augmentation with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9780-9790} }

Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding
Ziyang Wang,
Honglu Zhou,
Shijie Wang,
Junnan Li,
Caiming Xiong,
Silvio Savarese,
Mohit Bansal,
Michael S. Ryoo,
Juan Carlos Niebles
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhou, Honglu and Wang, Shijie and Li, Junnan and Xiong, Caiming and Savarese, Silvio and Bansal, Mohit and Ryoo, Michael S. and Niebles, Juan Carlos}, title = {Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9088-9099} }

BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation
Zihao Zhu,
Ruotong Wang,
Siwei Lyu,
Min Zhang,
Baoyuan Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zihao and Wang, Ruotong and Lyu, Siwei and Zhang, Min and Wu, Baoyuan}, title = {BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8661-8671} }

Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery
Bohan Zhang,
Weidong Tang,
Zhixiang Chi,
Yi Jin,
Zhenbo Li,
Yang Wang,
Yanan Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bohan and Tang, Weidong and Chi, Zhixiang and Jin, Yi and Li, Zhenbo and Wang, Yang and Wu, Yanan}, title = {Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7830-7840} }

Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs
Chenxi Zhao,
Yan Zhou,
Jufeng Yang
[pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhou, Yan and Yang, Jufeng}, title = {Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8897-8906} }

Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization
Qinghui He,
Haifeng Zhang,
Xiuli Bi,
Bo Liu,
Chi-Man Pun,
Bin Xiao
[pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Xiao, Bin}, title = {Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6748-6757} }

ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection
Yupeng Zhang,
Ruize Han,
Fangnan Zhou,
Wei Feng,
Liang Wan
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Zhou, Fangnan and Feng, Wei and Wan, Liang}, title = {ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6643-6654} }

NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation
Huanxin Zou,
Zhize Wu,
Yue Jiang,
Jijian Zhou,
Zhiwei Xu,
Teng Li,
Jianhua Shu,
Fan Cheng
[pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Huanxin and Wu, Zhize and Jiang, Yue and Zhou, Jijian and Xu, Zhiwei and Li, Teng and Shu, Jianhua and Cheng, Fan}, title = {NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7562-7572} }

From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness
Chenlin Fu,
Ao Gong,
Xingtao Ling,
Yingying Zhu
[pdf]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Chenlin and Gong, Ao and Ling, Xingtao and Zhu, Yingying}, title = {From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7101-7110} }

ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding
Hosu Lee,
Junho Kim,
Hyunjun Kim,
Yong Man Ro
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hosu and Kim, Junho and Kim, Hyunjun and Ro, Yong Man}, title = {ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8291-8302} }

VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models
Pavan Kumar Anasosalu Vasu,
Cem Koc,
Fartash Faghri,
Chun-Liang Li,
Bo Feng,
Zhengfeng Lai,
Meng Cao,
Oncel Tuzel,
Hadi Pouransari
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2026_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Koc, Cem and Faghri, Fartash and Li, Chun-Liang and Feng, Bo and Lai, Zhengfeng and Cao, Meng and Tuzel, Oncel and Pouransari, Hadi}, title = {VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9654-9663} }

Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models
Haoyi Sun,
Xiaoxiao Wang,
Ning Mao,
Qian Wang,
Lifu Mu,
Wen Zheng,
Tao Wei,
Wei Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haoyi and Wang, Xiaoxiao and Mao, Ning and Wang, Qian and Mu, Lifu and Zheng, Wen and Wei, Tao and Chen, Wei}, title = {Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9316-9326} }

DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning
Chi Zhang,
Haibo Qiu,
Qiming Zhang,
Zhixiong Zeng,
Lin Ma,
Jing Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Zeng, Zhixiong and Ma, Lin and Zhang, Jing}, title = {DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9389-9400} }

Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection
Sanghoon Lee,
Geon Lee,
Hyekang Park,
Bumsub Ham
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghoon and Lee, Geon and Park, Hyekang and Ham, Bumsub}, title = {Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6819-6828} }

Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios
Xuekang Zhu,
Ji-Zhe Zhou,
Kaiwen Feng,
Chenfan Qu,
Xiwen Wang,
Yunfei Wang,
Liting Zhou,
Jian Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xuekang and Zhou, Ji-Zhe and Feng, Kaiwen and Qu, Chenfan and Wang, Xiwen and Wang, Yunfei and Zhou, Liting and Liu, Jian}, title = {Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7198-7207} }

MIRA: Multimodal Iterative Reasoning Agent for Image Editing
Ziyun Zeng,
Hang Hua,
Jiebo Luo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyun and Hua, Hang and Luo, Jiebo}, title = {MIRA: Multimodal Iterative Reasoning Agent for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9563-9573} }

SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation
Gia Huy Thai,
Hoang-Nguyen Vu,
Anh-Minh Phan,
Quang-Thinh Ly,
Thi-Ngoc-Truc Nguyen,
Nhat Ho
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thai_2026_CVPR, author = {Thai, Gia Huy and Vu, Hoang-Nguyen and Phan, Anh-Minh and Ly, Quang-Thinh and Nguyen, Thi-Ngoc-Truc and Ho, Nhat}, title = {SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7337-7346} }

HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping
Zahid Hassan Tushar,
Sanjay Purushotham
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tushar_2026_CVPR, author = {Tushar, Zahid Hassan and Purushotham, Sanjay}, title = {HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6955-6965} }

Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification
Han Sun,
Qin Li,
Peixin Wang,
Min Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Han and Li, Qin and Wang, Peixin and Zhang, Min}, title = {Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8930-8940} }

Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters
Chris Vorster,
Mayug Maniparambil,
Noel O'Connor,
Noel Murphy,
Derek Molloy
[pdf] [supp]
[bibtex]
@InProceedings{Vorster_2026_CVPR, author = {Vorster, Chris and Maniparambil, Mayug and O'Connor, Noel and Murphy, Noel and Molloy, Derek}, title = {Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7820-7829} }

Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks
Shijie Lian,
Changti Wu,
Laurence Tianruo Yang,
Hang Yuan,
Bin Yu,
Lei Zhang,
Kai Chen
[pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shijie and Wu, Changti and Yang, Laurence Tianruo and Yuan, Hang and Yu, Bin and Zhang, Lei and Chen, Kai}, title = {Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9824-9835} }

PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views
Jiaxin Shi,
Guofeng Zhang,
Wufei Ma,
Naifu Liang,
Adam Kortylewski,
Alan Yuille
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Jiaxin and Zhang, Guofeng and Ma, Wufei and Liang, Naifu and Kortylewski, Adam and Yuille, Alan}, title = {PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6869-6879} }

MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation
Hanjun Tao,
Hua Wang,
Fan Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Hanjun and Wang, Hua and Zhang, Fan}, title = {MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7388-7397} }

OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation
Seungjae Moon,
Seunghyun Oh,
Youngmin Ro
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Seungjae and Oh, Seunghyun and Ro, Youngmin}, title = {OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7357-7367} }

TP^2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids
Ya-Yun Cheng,
Kan Tippayamontri,
Chih-Yuan Yang,
Jane Yung-jen Hsu
[pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ya-Yun and Tippayamontri, Kan and Yang, Chih-Yuan and Hsu, Jane Yung-jen}, title = {TP{\textasciicircum}2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8237-8246} }

ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling
Shaobo Ju,
Baiyang Song,
Tao Chen,
Jiapeng Zhang,
Qiong Wu,
Chao Chang,
Huaixi Wang,
Yiyi Zhou,
Rongrong Ji
[pdf] [arXiv]
[bibtex]
@InProceedings{Ju_2026_CVPR, author = {Ju, Shaobo and Song, Baiyang and Chen, Tao and Zhang, Jiapeng and Wu, Qiong and Chang, Chao and Wang, Huaixi and Zhou, Yiyi and Ji, Rongrong}, title = {ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8326-8336} }

CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare
Akash Ghosh,
Tajamul Ashraf,
Rishu Kumar Singh,
Numan Saeed,
Sriparna Saha,
Xiuying Chen,
Salman Khan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Akash and Ashraf, Tajamul and Singh, Rishu Kumar and Saeed, Numan and Saha, Sriparna and Chen, Xiuying and Khan, Salman}, title = {CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9695-9705} }

CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition
Zhongquan Jian,
Yanhao Chen,
Bingbing Hu,
Wenhan Lv,
Shaopan Wang,
Jipeng Wu,
Junfeng Yao,
Yang Lu,
Qingqiang Wu
[pdf] [supp]
[bibtex]
@InProceedings{Jian_2026_CVPR, author = {Jian, Zhongquan and Chen, Yanhao and Hu, Bingbing and Lv, Wenhan and Wang, Shaopan and Wu, Jipeng and Yao, Junfeng and Lu, Yang and Wu, Qingqiang}, title = {CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7706-7716} }

Object-Centric Vision Token Pruning for Vision Language Models
Guangyuan Li,
Rongzhen Zhao,
Jinhong Deng,
Yanbo Wang,
Joni Pajarinen
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangyuan and Zhao, Rongzhen and Deng, Jinhong and Wang, Yanbo and Pajarinen, Joni}, title = {Object-Centric Vision Token Pruning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7040-7049} }

SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks
Thiru Thillai Nadarasar Bahavan,
Sachith Seneviratne,
Saman Halgamuge
[pdf] [supp]
[bibtex]
@InProceedings{Bahavan_2026_CVPR, author = {Bahavan, Thiru Thillai Nadarasar and Seneviratne, Sachith and Halgamuge, Saman}, title = {SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6901-6910} }

coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation
Chunhan Li,
Qifeng Wu,
Jia-Hui Pan,
Ka-Hei Hui,
Jingyu Hu,
Yuming Jiang,
Bin Sheng,
Xihui Liu,
Wenjuan Gong,
Zhengzhe Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunhan and Wu, Qifeng and Pan, Jia-Hui and Hui, Ka-Hei and Hu, Jingyu and Jiang, Yuming and Sheng, Bin and Liu, Xihui and Gong, Wenjuan and Liu, Zhengzhe}, title = {coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9802-9812} }

Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings
Bumjun Kim,
Albert No
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Bumjun and No, Albert}, title = {Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7988-7997} }

Recursive Think-Answer Process for LLMs and VLMs
Byung-Kwan Lee,
Youngchae Chee,
Yong Man Ro
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Chee, Youngchae and Ro, Yong Man}, title = {Recursive Think-Answer Process for LLMs and VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9608-9621} }

FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning
Qinghui Gong,
Xue Yang,
Xunlei Chen,
Jinshan Lai,
Hua Meng,
Xiaohu Tang
[pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Qinghui and Yang, Xue and Chen, Xunlei and Lai, Jinshan and Meng, Hua and Tang, Xiaohu}, title = {FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8009-8018} }

VoQA: Visual-only Question Answering
Jianing An,
Luyang Jiang,
Jie Luo,
Wenjun Wu,
Lei Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Jianing and Jiang, Luyang and Luo, Jie and Wu, Wenjun and Huang, Lei}, title = {VoQA: Visual-only Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9100-9109} }

IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment
Zichen Zhu,
Yuheng Sun,
Mingxuan Zhu,
Wenjie Ma,
Situo Zhang,
Zhexiang Wang,
Ziyue Yang,
Danyang Zhang,
Kunyao Lan,
Zihan Zhao,
Dingye Liu,
Siqi Xiang,
Lu Chen,
Kai Yu
[pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zichen and Sun, Yuheng and Zhu, Mingxuan and Ma, Wenjie and Zhang, Situo and Wang, Zhexiang and Yang, Ziyue and Zhang, Danyang and Lan, Kunyao and Zhao, Zihan and Liu, Dingye and Xiang, Siqi and Chen, Lu and Yu, Kai}, title = {IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8672-8683} }

SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models
Quentin Guimard,
Federico Bartsch,
Simone Caldarella,
Rahaf Aljundi,
Elisa Ricci,
Massimiliano Mancini
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guimard_2026_CVPR, author = {Guimard, Quentin and Bartsch, Federico and Caldarella, Simone and Aljundi, Rahaf and Ricci, Elisa and Mancini, Massimiliano}, title = {SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8101-8110} }

Indexing Multimodal Language Models for Large-scale Image Retrieval
Bahey Tharwat,
Giorgos Kordopatis-Zilos,
Pavel Suma,
Ian Reid,
Giorgos Tolias
[pdf] [supp]
[bibtex]
@InProceedings{Tharwat_2026_CVPR, author = {Tharwat, Bahey and Kordopatis-Zilos, Giorgos and Suma, Pavel and Reid, Ian and Tolias, Giorgos}, title = {Indexing Multimodal Language Models for Large-scale Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6737-6747} }

MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models
Xiyang Wu,
Zongxia Li,
Jihui Jin,
Gouthaman KV,
Vishnu Raj,
Nilotpal Sinha,
Jingxi Chen,
Fan Du,
Dinesh Manocha
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiyang and Li, Zongxia and Jin, Jihui and KV, Gouthaman and Raj, Vishnu and Sinha, Nilotpal and Chen, Jingxi and Du, Fan and Manocha, Dinesh}, title = {MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9433-9443} }

MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors
Zhipeng Du,
Duolikun Danier,
Jan Eric Lenssen,
Hakan Bilen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Zhipeng and Danier, Duolikun and Lenssen, Jan Eric and Bilen, Hakan}, title = {MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7304-7314} }

VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack
Shiji Zhao,
Shukun Xiong,
Yao Huang,
Jin Yan,
Zhenyu Wu,
Jiyang Guan,
Ranjie Duan,
Jialing Tao,
Hui Xue,
Xingxing Wei
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shiji and Xiong, Shukun and Huang, Yao and Yan, Jin and Wu, Zhenyu and Guan, Jiyang and Duan, Ranjie and Tao, Jialing and Xue, Hui and Wei, Xingxing}, title = {VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9412-9421} }

Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation
Zhe Huang,
Hao Wen,
Aiming Hao,
Bingze Song,
Meiqi Wu,
Jiahong Wu,
Xiangxiang Chu,
Sheng Lu,
Haoqian Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhe and Wen, Hao and Hao, Aiming and Song, Bingze and Wu, Meiqi and Wu, Jiahong and Chu, Xiangxiang and Lu, Sheng and Wang, Haoqian}, title = {Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8153-8163} }

Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration
Bowen Tang,
Tao Wang,
Miao Zhang,
Xin Yu,
Jinwei Chen,
Bo Li,
Kaihao Zhang
[pdf]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bowen and Wang, Tao and Zhang, Miao and Yu, Xin and Chen, Jinwei and Li, Bo and Zhang, Kaihao}, title = {Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8175-8184} }

FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning
Jintong Gao,
He Zhao,
Yibo Yang,
Dandan Guo
[pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jintong and Zhao, He and Yang, Yibo and Guo, Dandan}, title = {FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7737-7746} }

PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation
Gabriele Rosi,
Fabio Cermelli,
Carlo Masone,
Barbara Caputo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosi_2026_CVPR, author = {Rosi, Gabriele and Cermelli, Fabio and Masone, Carlo and Caputo, Barbara}, title = {PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7326-7336} }

GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models
Cong Ray,
Xiangwen Deng,
Feice Huang,
ZhengXian Wu,
Shen'ao Jiang,
Peng Jiao,
Zhifang Liu,
Haoqian Wang
[pdf] [supp]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Cong and Deng, Xiangwen and Huang, Feice and Wu, ZhengXian and Jiang, Shen'ao and Jiao, Peng and Liu, Zhifang and Wang, Haoqian}, title = {GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9195-9204} }

Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation
Yeongsu Kim,
Seo-Yeon Choi,
Kyungsu Lee
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yeongsu and Choi, Seo-Yeon and Lee, Kyungsu}, title = {Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8857-8867} }

Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking
Yuichiro Takeuchi,
Yusuke Imoto,
Shunya Kato
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Takeuchi_2026_CVPR, author = {Takeuchi, Yuichiro and Imoto, Yusuke and Kato, Shunya}, title = {Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6880-6889} }

Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?
Jie Zhu,
Yiyang Su,
Xiaoming Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Su, Yiyang and Liu, Xiaoming}, title = {Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9401-9411} }

Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation
Jiaqi Bai,
Hongcheng Guo,
Jiaheng Liu,
Zhibo Zhou,
Jian Yang,
Feiran Huang
[pdf]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Jiaqi and Guo, Hongcheng and Liu, Jiaheng and Zhou, Zhibo and Yang, Jian and Huang, Feiran}, title = {Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9867-9877} }

Trajectory-Diversity-Driven Robust Vision-and-Language Navigation
Jiangyang Li,
Cong Wan,
SongLin Dong,
Chenhao Ding,
Qiang Wang,
Zhiheng Ma,
Yihong Gong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Wan, Cong and Dong, SongLin and Ding, Chenhao and Wang, Qiang and Ma, Zhiheng and Gong, Yihong}, title = {Trajectory-Diversity-Driven Robust Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9143-9154} }

Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs
Yuhui Lin,
Siyue Yu,
Yuxing Yang,
Guangliang Cheng,
Jimin Xiao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuhui and Yu, Siyue and Yang, Yuxing and Cheng, Guangliang and Xiao, Jimin}, title = {Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8941-8951} }

Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection
Wanqi Wang,
Jingcai Guo,
Yuxiang Cai,
Zhi Chen
[pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wanqi and Guo, Jingcai and Cai, Yuxiang and Chen, Zhi}, title = {Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7747-7757} }

Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning
Fankang Xu,
Lu Jin,
Yanpeng Sun,
Shiyu Xuan,
Zechao Li
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7717-7727} }

NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation
Quang Dang Huynh,
Xuefei Yin,
Andrew Busch,
Hugo G. Espinosa,
Alan Wee-Chung Liew,
Matthew T.O. Worsey,
Yanming Zhu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2026_CVPR, author = {Huynh, Quang Dang and Yin, Xuefei and Busch, Andrew and Espinosa, Hugo G. and Liew, Alan Wee-Chung and Worsey, Matthew T.O. and Zhu, Yanming}, title = {NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8368-8377} }

OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition
Junhui Yin,
Zhizhen Cai,
Puze Wang,
Guanzhou Ke,
Jianhua Yang,
Man Zhang,
Qiang Zhang,
Shengfeng He
[pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Junhui and Cai, Zhizhen and Wang, Puze and Ke, Guanzhou and Yang, Jianhua and Zhang, Man and Zhang, Qiang and He, Shengfeng}, title = {OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6717-6726} }

RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning
Suhang Hu,
Wei Hu,
Yuhang Su,
Fan Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Suhang and Hu, Wei and Su, Yuhang and Zhang, Fan}, title = {RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9878-9887} }

Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss
Hongye Xu,
Bartosz Krawczyk
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Hongye and Krawczyk, Bartosz}, title = {Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7852-7861} }

DARTS: Distance-Aware Robust Training for Selective Classification
A. Q. M. Sazzad Sayyed,
Nathaniel D. Bastian,
Francesco Restuccia
[pdf] [supp]
[bibtex]
@InProceedings{Sayyed_2026_CVPR, author = {Sayyed, A. Q. M. Sazzad and Bastian, Nathaniel D. and Restuccia, Francesco}, title = {DARTS: Distance-Aware Robust Training for Selective Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8806-8815} }

Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model
Zhiming Liu,
Yujie Wei,
Lei Feng,
Xiu Su,
Xiaobo Xia,
Weili Guan,
Zeke Xie,
Shuo Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiming and Wei, Yujie and Feng, Lei and Su, Xiu and Xia, Xiaobo and Guan, Weili and Xie, Zeke and Yang, Shuo}, title = {Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9597-9607} }

D^2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting
Xiaoai Wang,
Hang Wang,
Yan Liu,
Huan Hu,
Bruce X.B. Yu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoai and Wang, Hang and Liu, Yan and Hu, Huan and Yu, Bruce X.B.}, title = {D{\textasciicircum}2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8205-8214} }

Efficient Unlearning through Maximizing Relearning Convergence Delay
Khoa Tran,
Simon S. Woo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Khoa and Woo, Simon S.}, title = {Efficient Unlearning through Maximizing Relearning Convergence Delay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7968-7977} }

Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation
Jacey Somers,
Harrison Zale,
Janine Mason,
Tina Walker,
Eddie Quinn,
Felix Lewis,
Gavin Wright,
Yvonne Young,
Charles Sullivan,
Wayne Carter,
Julian Foster
[pdf]
[bibtex]
@InProceedings{Somers_2026_CVPR, author = {Somers, Jacey and Zale, Harrison and Mason, Janine and Walker, Tina and Quinn, Eddie and Lewis, Felix and Wright, Gavin and Young, Yvonne and Sullivan, Charles and Carter, Wayne and Foster, Julian}, title = {Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8534-8543} }

Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding
Yuefei Chen,
Jiang Liu,
Xiaodong Lin,
Ruixiang Tang
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuefei and Liu, Jiang and Lin, Xiaodong and Tang, Ruixiang}, title = {Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9165-9174} }

Seeing Helps Reasoning in Language Models
Yulu Gan,
Kaiya Ivy Zhao,
Tomaso Poggio,
Phillip Isola
[pdf] [supp]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Yulu and Zhao, Kaiya Ivy and Poggio, Tomaso and Isola, Phillip}, title = {Seeing Helps Reasoning in Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7080-7090} }

TAPNext++: What's Next for Tracking Any Point (TAP)?
Sebastian Jung,
Artem Zholus,
Martin Sundermeyer,
Carl Doersch,
Ross Goroshin,
David Joseph Tan,
Sarath Chandar,
Rudolph Triebel,
Federico Tombari
[pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Sebastian and Zholus, Artem and Sundermeyer, Martin and Doersch, Carl and Goroshin, Ross and Tan, David Joseph and Chandar, Sarath and Triebel, Rudolph and Tombari, Federico}, title = {TAPNext++: What's Next for Tracking Any Point (TAP)?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8429-8438} }

From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage
Cihan Ruan,
Lebin Zhou,
Bingqing Zhao,
Rongduo Han,
Qiming Yuan,
Chenchen Zhu,
Linyi Han,
Liang Yang,
Wei Wang,
Wei Jiang,
Nam Ling
[pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Cihan and Zhou, Lebin and Zhao, Bingqing and Han, Rongduo and Yuan, Qiming and Zhu, Chenchen and Han, Linyi and Yang, Liang and Wang, Wei and Jiang, Wei and Ling, Nam}, title = {From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8544-8553} }

A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing
Shiwei Ding,
Lan Zhang,
Zhenlin Wang,
Xiaoyong Yuan
[pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Shiwei and Zhang, Lan and Wang, Zhenlin and Yuan, Xiaoyong}, title = {A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8039-8048} }

ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation
Wenyang Chen,
Zhanxuan Hu,
Yaping Zhang,
Hailong Ning,
Yonghang Tai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wenyang and Hu, Zhanxuan and Zhang, Yaping and Ning, Hailong and Tai, Yonghang}, title = {ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7408-7418} }

Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination
Yolo Yunlong Tang,
Daiki Shimada,
Hang Hua,
Chao Huang,
Jing Bi,
Rogerio Feris,
Chenliang Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Yunlong and Shimada, Daiki and Hua, Hang and Huang, Chao and Bi, Jing and Feris, Rogerio and Xu, Chenliang}, title = {Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8314-8325} }

Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning
Yushuo Zheng,
Huiyu Duan,
Zicheng Zhang,
Xiaohong Liu,
Xiongkuo Min
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yushuo and Duan, Huiyu and Zhang, Zicheng and Liu, Xiaohong and Min, Xiongkuo}, title = {Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7208-7219} }

Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework
Hongrui Jia,
Chaoya Jiang,
Shikun Zhang,
Wei Ye
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Zhang, Shikun and Ye, Wei}, title = {Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9574-9585} }

MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer
Jian Zhong,
Yifan Jiao,
Xi Shao,
Bing-Kun Bao
[pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Jian and Jiao, Yifan and Shao, Xi and Bao, Bing-Kun}, title = {MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7758-7767} }

Learning to Select Visual In-Context Demonstrations
Eugene Lee,
Yu-Chi Lin,
Jiajie Diao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eugene and Lin, Yu-Chi and Diao, Jiajie}, title = {Learning to Select Visual In-Context Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9455-9465} }

MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation
Ziyi Wang,
Xianping Ma,
Ziyao Wang,
Hongyang Zhang,
Man On Pun
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Ma, Xianping and Wang, Ziyao and Zhang, Hongyang and Pun, Man On}, title = {MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7251-7261} }

Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization
Qiji Ma,
Chuanguang Yang,
Zhulin An,
Libo Huang,
Erhu Zhao,
Yuqi Li,
Yongjun Xu
[pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Qiji and Yang, Chuanguang and An, Zhulin and Huang, Libo and Zhao, Erhu and Li, Yuqi and Xu, Yongjun}, title = {Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9531-9541} }

PTAD: Pose and Texture Agnostic Anomaly Detection
Wei Zhuo,
Jianen Xiang,
Miaomiao Liu,
Huajun Lu
[pdf] [supp]
[bibtex]
@InProceedings{Zhuo_2026_CVPR, author = {Zhuo, Wei and Xiang, Jianen and Liu, Miaomiao and Lu, Huajun}, title = {PTAD: Pose and Texture Agnostic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6779-6788} }

Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation
Junhao Xia,
Chaoyang Zhang,
Yecheng Zhang,
Chengyang Zhou,
Zhichang Wang,
Bochun Liu,
Dongshuo Yin
[pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Junhao and Zhang, Chaoyang and Zhang, Yecheng and Zhou, Chengyang and Wang, Zhichang and Liu, Bochun and Yin, Dongshuo}, title = {Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8607-8617} }

GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework
Pengyu Zeng,
Yuqin Dai,
Jun Yin,
Jing Zhong,
Ziyang Han,
Chaoyang Shi,
ZhanXiang Jin,
Maowei Jiang,
Yuxing Han,
Shuai Lu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Pengyu and Dai, Yuqin and Yin, Jun and Zhong, Jing and Han, Ziyang and Shi, Chaoyang and Jin, ZhanXiang and Jiang, Maowei and Han, Yuxing and Lu, Shuai}, title = {GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8596-8606} }

Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization
Xiaoxi Yang,
Bo Sun,
Yisheng An,
Ganchao Liu
[pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiaoxi and Sun, Bo and An, Yisheng and Liu, Ganchao}, title = {Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7188-7197} }

Label-Agnostic Category Discovery
Yuwei Bian,
Shidong Wang,
Chunming Li,
Haofeng Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Yuwei and Wang, Shidong and Li, Chunming and Zhang, Haofeng}, title = {Label-Agnostic Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7573-7582} }

Autoregressive Universal Video Segmentation Model
Miran Heo,
Sukjun Hwang,
Min-Hung Chen,
Yu-Chiang Frank Wang,
Albert Gu,
Seon Joo Kim,
Ryo Hachiuma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, Miran and Hwang, Sukjun and Chen, Min-Hung and Wang, Yu-Chiang Frank and Gu, Albert and Kim, Seon Joo and Hachiuma, Ryo}, title = {Autoregressive Universal Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7429-7438} }

Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers
Sayan Kumar Chaki,
Thierry Fournel,
Rémi Emonet
[pdf] [supp]
[bibtex]
@InProceedings{Chaki_2026_CVPR, author = {Chaki, Sayan Kumar and Fournel, Thierry and Emonet, R\'emi}, title = {Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7008-7017} }

Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models
Xi Zhang,
Hanwei Zhu,
Jiamang Wang,
Xiaolin Wu,
Weisi Lin
[pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xi and Zhu, Hanwei and Wang, Jiamang and Wu, Xiaolin and Lin, Weisi}, title = {Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9305-9315} }

Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination
Xinzhuo Li,
Adheesh Juvekar,
Jiaxun Zhang,
Xingyou Liu,
Muntasir Wahed,
Kiet A. Nguyen,
Yifan Shen,
Tianjiao Yu,
Ismini Lourentzou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinzhuo and Juvekar, Adheesh and Zhang, Jiaxun and Liu, Xingyou and Wahed, Muntasir and Nguyen, Kiet A. and Shen, Yifan and Yu, Tianjiao and Lourentzou, Ismini}, title = {Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7450-7460} }

MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling
Sicong Leng,
Jing Wang,
Jiaxi Li,
Hao Zhang,
Zhiqiang Hu,
Boqiang Zhang,
Yuming Jiang,
Hang Zhang,
Xin Li,
Deli Zhao,
Wei Lu,
Yu Rong,
Aixin Sun,
Shijian Lu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Sicong and Wang, Jing and Li, Jiaxi and Zhang, Hao and Hu, Zhiqiang and Zhang, Boqiang and Jiang, Yuming and Zhang, Hang and Li, Xin and Zhao, Deli and Lu, Wei and Rong, Yu and Sun, Aixin and Lu, Shijian}, title = {MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9075-9087} }

Seeing Through Fog: Towards Fog-Invariant Action Recognition
Enqi Liu,
Liyuan Pan,
Zhi Gao,
Lingzhi Li,
Qing Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Enqi and Pan, Liyuan and Gao, Zhi and Li, Lingzhi and Li, Qing}, title = {Seeing Through Fog: Towards Fog-Invariant Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6966-6975} }

Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory
Cheng-Yen Yang,
Hsiang-Wei Huang,
Kuang-Ming Chen,
Kunjun Li,
Jenq-Neng Hwang
[pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Cheng-Yen and Huang, Hsiang-Wei and Chen, Kuang-Ming and Li, Kunjun and Hwang, Jenq-Neng}, title = {Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8358-8367} }

From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval
Yi-Xiang Zhang,
Yu-Shuen Wang
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Xiang and Wang, Yu-Shuen}, title = {From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9520-9530} }

ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval
Haiming Zhao,
Tai Wang
[pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haiming and Wang, Tai}, title = {ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8164-8174} }

Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning
Jiayi Wang,
Wei Dai,
Haoyu Wang,
Sihan Yang,
Haixia Bi,
Jian Sun
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Dai, Wei and Wang, Haoyu and Yang, Sihan and Bi, Haixia and Sun, Jian}, title = {Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7520-7529} }

GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation
Chaewon Lee,
JunHyeok Heo,
Chang-Su Kim
[pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chaewon and Heo, JunHyeok and Kim, Chang-Su}, title = {GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8736-8745} }

Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems
YuChe Hsu,
AnJui Wang,
TsaiChing Ni,
YuanFu Yang
[pdf] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, YuChe and Wang, AnJui and Ni, TsaiChing and Yang, YuanFu}, title = {Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8705-8714} }

MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning
Zhongyu Wang,
Pengbo Liu
[pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongyu and Liu, Pengbo}, title = {MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9674-9683} }

SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation
Vishal Thengane,
Zhaochong An,
Tianjin Huang,
Son Lam Phung,
Abdesselam Bouzerdoum,
Lu Yin,
Na Zhao,
Xiatian Zhu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thengane_2026_CVPR, author = {Thengane, Vishal and An, Zhaochong and Huang, Tianjin and Phung, Son Lam and Bouzerdoum, Abdesselam and Yin, Lu and Zhao, Na and Zhu, Xiatian}, title = {SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7368-7377} }

From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness
My H. Dinh,
Aditya Sant,
Akshay Malhotra,
Keya Patani,
Shahab Hamidi-Rad
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2026_CVPR, author = {Dinh, My H. and Sant, Aditya and Malhotra, Akshay and Patani, Keya and Hamidi-Rad, Shahab}, title = {From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7070-7079} }

UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding
Shuquan Lian,
Yuhang Wu,
Jia Ma,
Yifan Ding,
Zihan Song,
Bingqi Chen,
Xiawu Zheng,
Hui Li,
Rongrong Ji
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shuquan and Wu, Yuhang and Ma, Jia and Ding, Yifan and Song, Zihan and Chen, Bingqi and Zheng, Xiawu and Li, Hui and Ji, Rongrong}, title = {UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8575-8584} }

SCP: Spatial Causal Prediction in Video
Yanguang Zhao,
Jie Yang,
Shengqiong Wu,
Shutong Hu,
Hongbo Qiu,
Yu Wang,
Guijia Zhang,
Tan Kai Ze,
Hao Fei,
Chia-Wen Lin,
Mong-Li Lee,
Wynne Hsu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yanguang and Yang, Jie and Wu, Shengqiong and Hu, Shutong and Qiu, Hongbo and Wang, Yu and Zhang, Guijia and Ze, Tan Kai and Fei, Hao and Lin, Chia-Wen and Lee, Mong-Li and Hsu, Wynne}, title = {SCP: Spatial Causal Prediction in Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7165-7175} }

CineMatte: Background Matting for Virtual Production and Beyond
Yuanjian He,
Chen Zhang,
Fasheng Chen,
Jiangbo Cao
[pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanjian and Zhang, Chen and Chen, Fasheng and Cao, Jiangbo}, title = {CineMatte: Background Matting for Virtual Production and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8725-8735} }

DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations
Cyril Kana Tepakbong,
Kévin Bouchard,
Julien Maitre
[pdf] [supp]
[bibtex]
@InProceedings{Tepakbong_2026_CVPR, author = {Tepakbong, Cyril Kana and Bouchard, K\'evin and Maitre, Julien}, title = {DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7483-7492} }

Complexity of Linear Regions in Self-supervised Deep ReLU Networks
Mufhumudzi Muthivhi,
Terence L. van Zyl
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Muthivhi_2026_CVPR, author = {Muthivhi, Mufhumudzi and van Zyl, Terence L.}, title = {Complexity of Linear Regions in Self-supervised Deep ReLU Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6911-6920} }

Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity
Peicheng Zhou,
Shancheng Fang,
Chenhui Jin,
Bowei Pu,
Hongtao Xie
[pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Peicheng and Fang, Shancheng and Jin, Chenhui and Pu, Bowei and Xie, Hongtao}, title = {Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6799-6808} }

DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation
Dhenenjay Yadav,
Rohan Sawai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Dhenenjay and Sawai, Rohan}, title = {DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7625-7633} }

Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning
Zhilong Mao,
Hang Zhang,
Yanmin Li,
Lihua Liu,
Jibing Wu,
Mao Wang
[pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Zhilong and Zhang, Hang and Li, Yanmin and Liu, Lihua and Wu, Jibing and Wang, Mao}, title = {Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7810-7819} }

Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding
Yanan Zhu,
Ziwei Xiang,
Jiamin Wu,
Jinyang Guo,
Hongyuan Zhang,
Chunfeng Song,
Hongjian Fang,
Yufei Guo,
Xianglong Liu
[pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yanan and Xiang, Ziwei and Wu, Jiamin and Guo, Jinyang and Zhang, Hongyuan and Song, Chunfeng and Fang, Hongjian and Guo, Yufei and Liu, Xianglong}, title = {Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6667-6676} }

STORM: End-to-End Referring Multi-Object Tracking in Videos
Zijia Lu,
Jingru Yi,
Jue Wang,
Yuxiao Chen,
Junwen Chen,
Xinyu Li,
Davide Modolo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zijia and Yi, Jingru and Wang, Jue and Chen, Yuxiao and Chen, Junwen and Li, Xinyu and Modolo, Davide}, title = {STORM: End-to-End Referring Multi-Object Tracking in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8347-8357} }

Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning
Thinh Nguyen,
Le Huy Khiem,
Van-Tuan Tran,
Khoa D Doan,
Nitesh V. Chawla,
Kok-Seng Wong
[pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Khiem, Le Huy and Tran, Van-Tuan and Doan, Khoa D and Chawla, Nitesh V. and Wong, Kok-Seng}, title = {Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7728-7736} }

Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance
Jia Li,
Zhankai Li,
Yongqiang Yu,
Xuehu Yan,
Yuliang Lu
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jia and Li, Zhankai and Yu, Yongqiang and Yan, Xuehu and Lu, Yuliang}, title = {Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7935-7944} }

UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation
Wufei Ma,
Sky Cen,
Jianzhi Shen,
Rex Lee,
León Begiristain,
Yan Zhuang,
Jiawei Peng,
Zhifei Yu,
Tianao Song,
Xinyuan Qi,
Tianmin Shu,
Adam Kortylewski,
Alan Yuille
[pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Wufei and Cen, Sky and Shen, Jianzhi and Lee, Rex and Begiristain, Le\'on and Zhuang, Yan and Peng, Jiawei and Yu, Zhifei and Song, Tianao and Qi, Xinyuan and Shu, Tianmin and Kortylewski, Adam and Yuille, Alan}, title = {UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9716-9725} }

CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning
Xinyu Li,
Shiliang Sun
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinyu and Sun, Shiliang}, title = {CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9065-9074} }

GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation
Changqun Feng,
Wangxiandi Yin,
Xin Hu,
Lei Zhao,
Dongyang Zhang,
Tao He
[pdf]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Changqun and Yin, Wangxiandi and Hu, Xin and Zhao, Lei and Zhang, Dongyang and He, Tao}, title = {GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7143-7153} }

IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection
Qian Xu,
Shuaipeng Fan,
Fei Gao,
Mingjin Zhang
[pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qian and Fan, Shuaipeng and Gao, Fei and Zhang, Mingjin}, title = {IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8408-8418} }

Multimodal Reasoning with Explicit Reasoning Patterns and Rewards
Han Qiu,
Sheng Jin,
Zhongrong Zuo,
Ziyue Wang,
Qi She,
Ling Shao,
Shijian Lu
[pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Han and Jin, Sheng and Zuo, Zhongrong and Wang, Ziyue and She, Qi and Shao, Ling and Lu, Shijian}, title = {Multimodal Reasoning with Explicit Reasoning Patterns and Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9542-9551} }

Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models
Zheyuan Gu,
Qingsong Zhao,
Yusong Wang,
Zhaohong Huang,
Xinqi Li,
Chen Yuan,
Jiawei Shao,
Chi Zhang,
Xuelong Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zheyuan and Zhao, Qingsong and Wang, Yusong and Huang, Zhaohong and Li, Xinqi and Yuan, Chen and Shao, Jiawei and Zhang, Chi and Li, Xuelong}, title = {Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8585-8595} }

Harmonized Multi-Layer Text-to-Image Generation with Generative Priors
Yusuf Dalva,
Yijun Li,
Qing Liu,
Nanxuan Zhao,
Jianming Zhang,
Zhe Lin,
Pinar Yanardag
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dalva_2026_CVPR, author = {Dalva, Yusuf and Li, Yijun and Liu, Qing and Zhao, Nanxuan and Zhang, Jianming and Lin, Zhe and Yanardag, Pinar}, title = {Harmonized Multi-Layer Text-to-Image Generation with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8471-8480} }

100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing
Cunqi Wu,
Peng Zhou,
Jie Qin,
Qi Tian
[pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Cunqi and Zhou, Peng and Qin, Jie and Tian, Qi}, title = {100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8449-8460} }

Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information
Yinbo Liu,
Qi Wu,
Keyang Ye,
Xiao He,
Tian Tian
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yinbo and Wu, Qi and Ye, Keyang and He, Xiao and Tian, Tian}, title = {Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8492-8503} }

Face Time Traveller : Travel Through Ages Without Losing Identity
Purbayan Kar,
Ayush Ghadiya,
Vishal Chudasama,
Pankaj Wasnik,
C.V. Jawahar
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kar_2026_CVPR, author = {Kar, Purbayan and Ghadiya, Ayush and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {Face Time Traveller : Travel Through Ages Without Losing Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8756-8765} }

CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering
Zai Yang Yu,
Changshuo Wang,
Yuan Shi,
Linjun Sun,
Shu Wei,
Tingran Wang,
Wangyu Wu,
Yanjie Li,
Weijun Li
[pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Zai Yang and Wang, Changshuo and Shi, Yuan and Sun, Linjun and Wei, Shu and Wang, Tingran and Wu, Wangyu and Li, Yanjie and Li, Weijun}, title = {CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7220-7230} }

Prompt-driven Small Object Instance Segmentation in Earth Observation
Chenhao Wang,
Yingrui Ji,
Yu Meng,
Yunjian Zhang,
Yao Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenhao and Ji, Yingrui and Meng, Yu and Zhang, Yunjian and Zhu, Yao}, title = {Prompt-driven Small Object Instance Segmentation in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7347-7356} }

Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection
Zhiqiang Yang,
Renshuai Tao,
Chunjie Zhang,
Guodong Yang,
Xiaolong Zheng,
Yao Zhao
[pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiqiang and Tao, Renshuai and Zhang, Chunjie and Yang, Guodong and Zheng, Xiaolong and Zhao, Yao}, title = {Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8090-8100} }

Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning
Yudi Shi,
Shangzhe Di,
Qirui Chen,
Qinian Wang,
Jiayin Cai,
Xiaolong Jiang,
Yao Hu,
Weidi Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Wang, Qinian and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi}, title = {Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9248-9258} }

Towards Efficient Multimodal Unified Reasoning Model via Model Merging
Qixiang Yin,
Huanjin Yao,
Jianghao Chen,
Jiaxing Huang,
Zhicheng Zhao,
Fei Su
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Qixiang and Yao, Huanjin and Chen, Jianghao and Huang, Jiaxing and Zhao, Zhicheng and Su, Fei}, title = {Towards Efficient Multimodal Unified Reasoning Model via Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9378-9388} }

LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation
Yang Zhou,
Shiyu Zhao,
Yuxiao Chen,
Zhenting Wang,
Can Jin,
Mingyu Zhao,
Dimitris N. Metaxas
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Zhao, Shiyu and Chen, Yuxiao and Wang, Zhenting and Jin, Can and Zhao, Mingyu and Metaxas, Dimitris N.}, title = {LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9643-9653} }

Hierarchical Textual Knowledge for Enhanced Image Clustering
Yijie Zhong,
Yunfan Gao,
Weipeng Jiang,
Haofen Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yijie and Gao, Yunfan and Jiang, Weipeng and Wang, Haofen}, title = {Hierarchical Textual Knowledge for Enhanced Image Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9749-9758} }

Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack
Nanxiang Jiang,
Zhaoxin Fan,
Enhan Kang,
Daiheng Gao,
Yun Zhou,
Yanxia Chang,
Zheng Zhu,
Yeying Jin,
Wenjun Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nanxiang and Fan, Zhaoxin and Kang, Enhan and Gao, Daiheng and Zhou, Yun and Chang, Yanxia and Zhu, Zheng and Jin, Yeying and Wu, Wenjun}, title = {Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8080-8089} }

Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation
Zailong Chen,
Peng Gao,
Johan Barthelemy,
Luping Zhou,
Lei Wang
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zailong and Gao, Peng and Barthelemy, Johan and Zhou, Luping and Wang, Lei}, title = {Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9327-9336} }

Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection
Wenxuan Bao,
Yanjun Zhao,
Xiyuan Yang,
Jingrui He
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Wenxuan and Zhao, Yanjun and Yang, Xiyuan and He, Jingrui}, title = {Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9632-9642} }

ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning
Mengyang Li,
Ou Wu
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Wu, Ou}, title = {ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7914-7923} }

Open World Image Aesthetic Assessment
Mingxiang Liao,
Tianren Ma,
Xijin Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Mingxiang and Ma, Tianren and Zhang, Xijin}, title = {Open World Image Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9791-9801} }

Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners
Yizheng Gong,
Xiaoyang Wang,
Siyue Yu,
Waleed Al-Nuaimy,
Jimin Xiao
[pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Wang, Xiaoyang and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7665-7674} }

Asymmetric Collaborative Distillation for Asymmetric Image Retrieval
Yi Xie,
Huaidong Zhang,
Xuandi Luo,
Yan Zhou,
Shengfeng He
[pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yi and Zhang, Huaidong and Luo, Xuandi and Zhou, Yan and He, Shengfeng}, title = {Asymmetric Collaborative Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6706-6716} }

Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation
Weijian Ma,
Shizhao Sun,
Tianyu Yu,
Ruiyu Wang,
Tat-Seng Chua,
Jiang Bian
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Weijian and Sun, Shizhao and Yu, Tianyu and Wang, Ruiyu and Chua, Tat-Seng and Bian, Jiang}, title = {Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8919-8929} }

Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models
Abin Shoby,
Ta Duc Huy,
Tuan Dung Nguyen,
Minh Khoi Ho,
Qi Chen,
Anton van den Hengel,
Phi Le Nguyen,
Johan W. Verjans,
Vu Minh Hieu Phan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shoby_2026_CVPR, author = {Shoby, Abin and Huy, Ta Duc and Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and van den Hengel, Anton and Le Nguyen, Phi and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9185-9194} }

Large Multimodal Models as General In-Context Classifiers
Marco Garosi,
Matteo Farina,
Alessandro Conti,
Massimiliano Mancini,
Elisa Ricci
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garosi_2026_CVPR, author = {Garosi, Marco and Farina, Matteo and Conti, Alessandro and Mancini, Massimiliano and Ricci, Elisa}, title = {Large Multimodal Models as General In-Context Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6727-6736} }

Visual Reasoning Through Tool-Supervised Reinforcement Learning
Qihua Dong,
Gozde Sahin,
Pei Wang,
Zhaowei Cai,
Robik Shrestha,
Hao Yang,
Davide Modolo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Qihua and Sahin, Gozde and Wang, Pei and Cai, Zhaowei and Shrestha, Robik and Yang, Hao and Modolo, Davide}, title = {Visual Reasoning Through Tool-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8993-9002} }

DM^3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking
Weiran Li,
Yeqiang Liu,
Yijie Wei,
Mina Han,
Qiannan Guo,
Zhenbo Li
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiran and Liu, Yeqiang and Wei, Yijie and Han, Mina and Guo, Qiannan and Li, Zhenbo}, title = {DM{\textasciicircum}3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8398-8407} }

Memory-efficient Continual Learning with Prototypical Exemplar Condensation
M.-Duong Nguyen,
Thien-Thanh Dao,
Le-Tuan Nguyen,
Dung D. Le,
Kok-Seng Wong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, M.-Duong and Dao, Thien-Thanh and Nguyen, Le-Tuan and Le, Dung D. and Wong, Kok-Seng}, title = {Memory-efficient Continual Learning with Prototypical Exemplar Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7675-7685} }

Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA
Yujie Wang,
Hu Zhang,
Jiye Liang,
Zhiqiang Wang,
Hongye Tan,
Ru Li
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yujie and Zhang, Hu and Liang, Jiye and Wang, Zhiqiang and Tan, Hongye and Li, Ru}, title = {Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9282-9293} }

CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking
Zhenyu Wu,
Tengfei Shi,
Xuehao Wang,
Ming Li,
Chenglizhao Chen,
Wenfeng Song,
Aimin Hao
[pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Shi, Tengfei and Wang, Xuehao and Li, Ming and Chen, Chenglizhao and Song, Wenfeng and Hao, Aimin}, title = {CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7241-7250} }

Direct Language Embedding Enables Gaussian Splatting for Large Scenes
Zhida Li,
Jianqiao Zhu,
Hejin Huang,
Yipeng Qin,
Sibei Yang,
Guanbin Li
[pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhida and Zhu, Jianqiao and Huang, Hejin and Qin, Yipeng and Yang, Sibei and Li, Guanbin}, title = {Direct Language Embedding Enables Gaussian Splatting for Large Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7231-7240} }

AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning
Jianheng Tang,
Jingyu He,
Kejia Fan,
Run He,
Jingchao Wang,
Anfeng Liu,
Houbing Herbert Song,
Leye Wang,
Zhanxing Zhu,
Huiping Zhuang,
Yunhuai Liu
[pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jianheng and He, Jingyu and Fan, Kejia and He, Run and Wang, Jingchao and Liu, Anfeng and Song, Houbing Herbert and Wang, Leye and Zhu, Zhanxing and Zhuang, Huiping and Liu, Yunhuai}, title = {AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7768-7778} }

DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering
Guillermo Figueroa Araneda,
Iris Dania Jimenez,
Florian Hofherr,
Manny Ko,
Hector Andrade-Loarca,
Daniel Cremers
[pdf] [supp]
[bibtex]
@InProceedings{Araneda_2026_CVPR, author = {Araneda, Guillermo Figueroa and Jimenez, Iris Dania and Hofherr, Florian and Ko, Manny and Andrade-Loarca, Hector and Cremers, Daniel}, title = {DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8461-8470} }

SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery
Meng Cao,
Xingyu Li,
Xue Liu,
Ian Reid,
Xiaodan Liang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Meng and Li, Xingyu and Liu, Xue and Reid, Ian and Liang, Xiaodan}, title = {SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7176-7187} }

One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition
Balaji Darur,
Amanmeet Garg,
Makarand Tapaswi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darur_2026_CVPR, author = {Darur, Balaji and Garg, Amanmeet and Tapaswi, Makarand}, title = {One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8268-8279} }

DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer
Soichiro Okazaki,
Tatsuya Sasaki,
Hiroki Ohashi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Okazaki_2026_CVPR, author = {Okazaki, Soichiro and Sasaki, Tatsuya and Ohashi, Hiroki}, title = {DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6890-6900} }

DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition
Yang Yang,
Kai Xu,
Junyao Hou,
Miao Zhang,
Xiang Li,
Zhenghua Chen,
Yingxue Gao,
Min Wu
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yang and Xu, Kai and Hou, Junyao and Zhang, Miao and Li, Xiang and Chen, Zhenghua and Gao, Yingxue and Wu, Min}, title = {DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7789-7798} }

SemanticMoments: Training-Free Motion Similarity via Third Moment Features
Saar Huberman,
Kfir Goldberg,
Or Patashnik,
Sagie Benaim,
Ron Mokady
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Goldberg, Kfir and Patashnik, Or and Benaim, Sagie and Mokady, Ron}, title = {SemanticMoments: Training-Free Motion Similarity via Third Moment Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8419-8428} }

Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations
Toshimichi Aota,
Akinori Hashimoto,
Naoto Sekizuka,
Takayuki Okatani
[pdf] [supp]
[bibtex]
@InProceedings{Aota_2026_CVPR, author = {Aota, Toshimichi and Hashimoto, Akinori and Sekizuka, Naoto and Okatani, Takayuki}, title = {Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6932-6942} }

SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification
Jun Wei Hsieh,
Ying-Hsuan Wu,
Yi-Kuan Hsieh,
Xin Li,
Kuan-Chuan Peng,
Ming-Ching Chang
[pdf]
[bibtex]
@InProceedings{Hsieh_2026_CVPR, author = {Hsieh, Jun Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching}, title = {SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6839-6848} }

PSIM: Perceptual Similarity Index Measure
Md Eimran Hossain Eimon,
Hari Kalva
[pdf] [supp]
[bibtex]
@InProceedings{Eimon_2026_CVPR, author = {Eimon, Md Eimran Hossain and Kalva, Hari}, title = {PSIM: Perceptual Similarity Index Measure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8564-8574} }

StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios
Yifei Wang,
Zhenkai Li,
Tianwen Qian,
Huanran Zheng,
Zheng Wang,
Yuqian Fu,
Xiaoling Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifei and Li, Zhenkai and Qian, Tianwen and Zheng, Huanran and Wang, Zheng and Fu, Yuqian and Wang, Xiaoling}, title = {StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9422-9432} }

KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification
Christine Dewi,
Dhananjay R Thiruvady,
Nayyar Zaidi
[pdf] [supp]
[bibtex]
@InProceedings{Dewi_2026_CVPR, author = {Dewi, Christine and Thiruvady, Dhananjay R and Zaidi, Nayyar}, title = {KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8766-8775} }

When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements
Chi Zhang,
Yulang Gao,
Jiachen Zou,
Chen Wei,
Quanying Liu
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Gao, Yulang and Zou, Jiachen and Wei, Chen and Liu, Quanying}, title = {When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8111-8120} }

StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes
Souheib Ben Mabrouk,
Jean-Emmanuel Deschaud,
Eva Coupeté,
Thomas Derbanne,
Nicolas Rahmouni
[pdf] [supp]
[bibtex]
@InProceedings{Ben_Mabrouk_2026_CVPR, author = {Ben Mabrouk, Souheib and Deschaud, Jean-Emmanuel and Coupet\'e, Eva and Derbanne, Thomas and Rahmouni, Nicolas}, title = {StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8481-8491} }

Mitigating Vision-Text Order Bias in Vision-Language Model
Weilin Gan,
Yifan Song,
Zhuocheng Yu,
Sujian Li
[pdf]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Weilin and Song, Yifan and Yu, Zhuocheng and Li, Sujian}, title = {Mitigating Vision-Text Order Bias in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9664-9673} }

Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning
Aoqiang Zhu,
Min Hu,
Yan Xing,
Yiming Tang
[pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Aoqiang and Hu, Min and Xing, Yan and Tang, Yiming}, title = {Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6921-6931} }

ReConText3D: Replay-based Continual Text-to-3D Generation
Muhammad Ahmed Ullah Khan,
Muhammad Haris Bin Amir,
Didier Stricker,
Muhammad Zeshan Afzal
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Ahmed Ullah and Bin Amir, Muhammad Haris and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ReConText3D: Replay-based Continual Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7893-7902} }

TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation
Shuo Jin,
Siyue Yu,
Bingfeng Zhang,
Chao Yao,
Meiqin Liu,
Jimin Xiao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Yao, Chao and Liu, Meiqin and Xiao, Jimin}, title = {TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7472-7482} }

Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models
Junlong Ke,
Zichen Wen,
Boxue Yang,
Yantai Yang,
Xuyang Liu,
Chenfei Liao,
Zhaorun Chen,
Shaobo Wang,
Linfeng Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Junlong and Wen, Zichen and Yang, Boxue and Yang, Yantai and Liu, Xuyang and Liao, Chenfei and Chen, Zhaorun and Wang, Shaobo and Zhang, Linfeng}, title = {Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9131-9142} }

Super Sparse DETR: YOLO-Competitive Convergence and Acceleration
Hebao Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hebao}, title = {Super Sparse DETR: YOLO-Competitive Convergence and Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6677-6684} }

Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning
Jungwon Choi,
Eunwoo Kim
[pdf] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jungwon and Kim, Eunwoo}, title = {Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9368-9377} }

HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination
Hui Yu,
Xiao Ke,
Zhihong Zeng,
Huangbiao Xu,
Huanqi Wu
[pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Hui and Ke, Xiao and Zeng, Zhihong and Xu, Huangbiao and Wu, Huanqi}, title = {HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8337-8346} }

Online Interpretable Matrix Decomposition for Large-Scale Streaming Data
Muhammad A. A. Abdelgawad,
Abdelrahman B. M. Eldaly,
Meng Xinmin,
Peng Jing,
Abdurrashid Ibrahim Sanka,
Ray C.C. Cheung,
Hong Yan
[pdf] [supp]
[bibtex]
@InProceedings{Abdelgawad_2026_CVPR, author = {Abdelgawad, Muhammad A. A. and Eldaly, Abdelrahman B. M. and Xinmin, Meng and Jing, Peng and Sanka, Abdurrashid Ibrahim and Cheung, Ray C.C. and Yan, Hong}, title = {Online Interpretable Matrix Decomposition for Large-Scale Streaming Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7030-7039} }

ROSE: Retrieval-Oriented Segmentation Enhancement
Song Tang,
Guangquan Jie,
Henghui Ding,
Yu-Gang Jiang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Song and Jie, Guangquan and Ding, Henghui and Jiang, Yu-Gang}, title = {ROSE: Retrieval-Oriented Segmentation Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7398-7407} }

MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness
JaeHyuck Choi,
Minjun Kim,
Je Hyeong Hong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, JaeHyuck and Kim, Minjun and Hong, Je Hyeong}, title = {MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8524-8533} }

GRAFT: Graph-Based Affordance Transfer via Part Correspondence
Mengying Lin,
Utkarsh Mishra,
Ajay Mandlekar,
Danfei Xu
[pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Mengying and Mishra, Utkarsh and Mandlekar, Ajay and Xu, Danfei}, title = {GRAFT: Graph-Based Affordance Transfer via Part Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8746-8755} }

POMA-3D: The Point Map Way to 3D Scene Understanding
Ye Mao,
Weixun Luo,
Ranran Huang,
Junpeng Jing,
Krystian Mikolajczyk
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ye and Luo, Weixun and Huang, Ranran and Jing, Junpeng and Mikolajczyk, Krystian}, title = {POMA-3D: The Point Map Way to 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7282-7292} }

BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding
Ziyi Zhao,
Jinzhao Zhou,
Xiaowei Jiang,
Beining Cao,
Wenhao Ma,
Yang Shen,
Ren Li,
Yu-Kai Wang,
Chin-teng Lin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziyi and Zhou, Jinzhao and Jiang, Xiaowei and Cao, Beining and Ma, Wenhao and Shen, Yang and Li, Ren and Wang, Yu-Kai and Lin, Chin-teng}, title = {BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7050-7059} }

Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization
Chao Sun,
Junbo Zhang,
Chuanbo Zhu,
Mingjun Huang,
Bo Du
[pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chao and Zhang, Junbo and Zhu, Chuanbo and Huang, Mingjun and Du, Bo}, title = {Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7272-7281} }

Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models
Qingtao Pan,
Zhihao Dou,
Shuo Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Qingtao and Dou, Zhihao and Li, Shuo}, title = {Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9726-9737} }

Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision
Junjie Chen,
Zezheng Liu,
Runxiang Liu,
Yuming Fang,
Yifan Zuo,
Jiebin Yan
[pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Zezheng and Liu, Runxiang and Fang, Yuming and Zuo, Yifan and Yan, Jiebin}, title = {Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7872-7882} }

ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction
Quanyuan Ruan,
Kewei Shi,
Jiabao Lei,
Xifeng Gao,
Xiaoguang Han
[pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Quanyuan and Shi, Kewei and Lei, Jiabao and Gao, Xifeng and Han, Xiaoguang}, title = {ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8439-8448} }

VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding
Jianxiang He,
Meisheng Hong,
Jungang Li,
Weiyu Guo,
Xuming Hu,
Hui Xiong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jianxiang and Hong, Meisheng and Li, Jungang and Guo, Weiyu and Hu, Xuming and Xiong, Hui}, title = {VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9003-9012} }

MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data
Changhui Hu,
Bhalaji Nagarajan,
Ricardo Marques,
Petia Radeva
[pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Changhui and Nagarajan, Bhalaji and Marques, Ricardo and Radeva, Petia}, title = {MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7018-7029} }

Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach
Ruichao Mao,
Zhou Fang,
Teng Guo,
Hao Yang,
Yaping Li,
Shaohua Peng,
Maji Huang,
Xiaoyu Lin,
Shuoyang Liu,
Xuepeng Li,
Yuyu Zhang,
Hai Rao
[pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ruichao and Fang, Zhou and Guo, Teng and Yang, Hao and Li, Yaping and Peng, Shaohua and Huang, Maji and Lin, Xiaoyu and Liu, Shuoyang and Li, Xuepeng and Zhang, Yuyu and Rao, Hai}, title = {Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8983-8992} }

Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment
Theodor Wulff,
Federico Tavella,
Rahul Singh Maharjan,
Manith Adikari,
Angelo Cangelosi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wulff_2026_CVPR, author = {Wulff, Theodor and Tavella, Federico and Maharjan, Rahul Singh and Adikari, Manith and Cangelosi, Angelo}, title = {Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9269-9281} }

AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning
Xingyu Yang,
Yidan Ma,
Hanzhang Qu,
Jianfu Cao
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Ma, Yidan and Qu, Hanzhang and Cao, Jianfu}, title = {AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7552-7561} }

Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification
Yichun Hu,
Zixuan Hu,
Ling-Yu Duan
[pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yichun and Hu, Zixuan and Duan, Ling-Yu}, title = {Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6849-6858} }

Advancing Open-Set Detection and Segmentation via Disentangled Representations
Haokang Zhang,
Yuchen Guan,
Runxi Cheng,
Yujiu Yang
[pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haokang and Guan, Yuchen and Cheng, Runxi and Yang, Yujiu}, title = {Advancing Open-Set Detection and Segmentation via Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6622-6632} }

Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark
Ziyu Guo,
Xinyan Chen,
Renrui Zhang,
Ruichuan An,
Yu Qi,
Dongzhi Jiang,
Xiangtai Li,
Manyuan Zhang,
Hongsheng Li,
Pheng-Ann Heng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Chen, Xinyan and Zhang, Renrui and An, Ruichuan and Qi, Yu and Jiang, Dongzhi and Li, Xiangtai and Zhang, Manyuan and Li, Hongsheng and Heng, Pheng-Ann}, title = {Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9175-9184} }

FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation
Zhi Rao,
Yucheng Zhou,
Benjia Zhou,
Yiqing Huang,
Sergio Escalera,
Jun Wan
[pdf]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Zhi and Zhou, Yucheng and Zhou, Benjia and Huang, Yiqing and Escalera, Sergio and Wan, Jun}, title = {FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9237-9247} }

Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment
Sy-Tuyen Ho,
Koh Jun Hao,
Ngoc-Bao Nguyen,
Alexander Binder,
Ngai-Man Cheung
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2026_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Nguyen, Ngoc-Bao and Binder, Alexander and Cheung, Ngai-Man}, title = {Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8070-8079} }

Layer Embedding Deep Fusion Graph Neural Network
Taihua Xu,
Genhao Tian,
Jicong Fan,
Xibei Yang,
Qinghua Zhang,
Yun Cui
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Taihua and Tian, Genhao and Fan, Jicong and Yang, Xibei and Zhang, Qinghua and Cui, Yun}, title = {Layer Embedding Deep Fusion Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7091-7100} }

Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation
Jiedong Zhuang,
Lu Lu,
Ming Dai,
Jian Chen,
Qiang Liu,
Haoji Hu
[pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Jiedong and Lu, Lu and Dai, Ming and Chen, Jian and Liu, Qiang and Hu, Haoji}, title = {Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9023-9033} }

FraQAT: Quantization Aware Training with Fractional Bits
Luca Morreale,
Alberto Gil C P Ramos,
Malcolm Chadwick,
Mehdi Noroozi,
Ruchika Chavhan,
Abhinav Mehrotra
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morreale_2026_CVPR, author = {Morreale, Luca and Gil C P Ramos, Alberto and Chadwick, Malcolm and Noroozi, Mehdi and Chavhan, Ruchika and Mehrotra, Abhinav}, title = {FraQAT: Quantization Aware Training with Fractional Bits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8514-8523} }

A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning
Changyu Liu,
James Chenhao Liang,
Wenhao Yang,
Yiming Cui,
Jinghao Yang,
Tianyang Wang,
Qifan Wang,
Dongfang Liu,
Cheng Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Changyu and Liang, James Chenhao and Yang, Wenhao and Cui, Yiming and Yang, Jinghao and Wang, Tianyang and Wang, Qifan and Liu, Dongfang and Han, Cheng}, title = {A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6943-6954} }

VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation
Haitao Jiang,
Xu Li,
Yuanyang Cao,
Ying Zhang,
Jianji Wang
[pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haitao and Li, Xu and Cao, Yuanyang and Zhang, Ying and Wang, Jianji}, title = {VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6809-6818} }

Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization
Zidong Zhao,
Yihao Huang,
Qing Guo,
Tianlin Li,
Anran Li,
Kailong Wang,
Jin Song Dong,
Geguang Pu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zidong and Huang, Yihao and Guo, Qing and Li, Tianlin and Li, Anran and Wang, Kailong and Dong, Jin Song and Pu, Geguang}, title = {Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8049-8058} }

EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification
Yuanlin He,
Zhenchuan Wang,
Jun Chen,
Yingying He,
Jiabao Wang,
Weiwen Wang,
Kun Xu,
Zijin Zhou,
Xiaoxiao Wang,
Mingju Chen,
Tingting Liu,
Zhisong Pan
[pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanlin and Wang, Zhenchuan and Chen, Jun and He, Yingying and Wang, Jiabao and Wang, Weiwen and Xu, Kun and Zhou, Zijin and Wang, Xiaoxiao and Chen, Mingju and Liu, Tingting and Pan, Zhisong}, title = {EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6758-6767} }

Beyond Syntax: Action Semantics Learning for App Agents
Bohan Tang,
Dezhao Luo,
Jianheng Liu,
Jingxuan Chen,
Shaogang Gong,
Jianye Hao,
Jun Wang,
Kun Shao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bohan and Luo, Dezhao and Liu, Jianheng and Chen, Jingxuan and Gong, Shaogang and Hao, Jianye and Wang, Jun and Shao, Kun}, title = {Beyond Syntax: Action Semantics Learning for App Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9444-9454} }

Weakly-Supervised Referring Video Object Segmentation Through Text Supervision
Miaojing Shi,
Jun Huang,
Zijie Yue,
Hanli Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Miaojing and Huang, Jun and Yue, Zijie and Wang, Hanli}, title = {Weakly-Supervised Referring Video Object Segmentation Through Text Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7461-7471} }

Don't Let the Information Slip Away
Taozhe Li,
Guansu Wang,
Bo Yu,
Yiming Liu,
Wei Sun
[pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Taozhe and Wang, Guansu and Yu, Bo and Liu, Yiming and Sun, Wei}, title = {Don't Let the Information Slip Away}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8504-8513} }

Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning
Ryuki Tezuka,
Chihiro Nakatani,
Norimichi Ukita
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tezuka_2026_CVPR, author = {Tezuka, Ryuki and Nakatani, Chihiro and Ukita, Norimichi}, title = {Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8215-8225} }

Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration
Shaoguang Wang,
Weiyu Guo,
Ziyang Chen,
Yijie Xu,
Xuming Hu,
Hui Xiong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoguang and Guo, Weiyu and Chen, Ziyang and Xu, Yijie and Hu, Xuming and Xiong, Hui}, title = {Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9856-9866} }

HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression
Haoxuan Li,
Mengyan Li,
Junjun Zheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haoxuan and Li, Mengyan and Zheng, Junjun}, title = {HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8195-8204} }

Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection
Jielun Peng,
Yabin Wang,
Yaqi Li,
Long Kong,
Xiaopeng Hong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Jielun and Wang, Yabin and Li, Yaqi and Kong, Long and Hong, Xiaopeng}, title = {Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6655-6666} }

RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation
Chanseul Cho,
Seokju Yun,
Jaesung Jun,
Seungjae Moon,
Youngmin Ro
[pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Chanseul and Yun, Seokju and Jun, Jaesung and Moon, Seungjae and Ro, Youngmin}, title = {RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7503-7513} }

Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation
JunJie Li,
Miyu Li,
Jiawei Wang,
Yu Liu,
Yumei Wang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, JunJie and Li, Miyu and Wang, Jiawei and Liu, Yu and Wang, Yumei}, title = {Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8887-8896} }

PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs
Zhilin Zhang,
Xiang Zhang,
Jiaqi Wei,
Yiwei Xu,
Chenyu You
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhilin and Zhang, Xiang and Wei, Jiaqi and Xu, Yiwei and You, Chenyu}, title = {PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9813-9823} }

Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation
Jiaxuan Zhang,
Qianqian Xu,
Peisong Wen,
Siran Dai,
Yang Liu,
Qingming Huang
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaxuan and Xu, Qianqian and Wen, Peisong and Dai, Siran and Liu, Yang and Huang, Qingming}, title = {Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7924-7934} }

Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation
Mian Muhammad Naeem Abid,
Radu Timofte
[pdf] [supp]
[bibtex]
@InProceedings{Abid_2026_CVPR, author = {Abid, Mian Muhammad Naeem and Timofte, Radu}, title = {Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7293-7303} }

REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection
Chengxi Chu,
Nurul Japar,
Chee Kau Lim
[pdf]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Chengxi and Japar, Nurul and Lim, Chee Kau}, title = {REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8280-8290} }

ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning
Qin Li,
Qi Li,
Limei Liu,
Junfeng Yang,
Han Peng
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qin and Li, Qi and Liu, Limei and Yang, Junfeng and Peng, Han}, title = {ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6997-7007} }

Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation
Yi Yang,
Qiang Jiao,
Mengrui Shi,
Qiang Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Jiao, Qiang and Shi, Mengrui and Zhang, Qiang}, title = {Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7378-7387} }

Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition
Gurucharan Srinivas,
Joshua Niemeijer,
Frank Köster
[pdf] [supp]
[bibtex]
@InProceedings{Srinivas_2026_CVPR, author = {Srinivas, Gurucharan and Niemeijer, Joshua and K\"oster, Frank}, title = {Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7122-7131} }

Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection
Xiaowei Zhao,
Zhide Liu,
Yuqing Ma,
Xianglong Liu
[pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaowei and Liu, Zhide and Ma, Yuqing and Liu, Xianglong}, title = {Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9357-9367} }

Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection
Ziyang Zheng,
Weiyan Chen,
Yao Xiao,
Zijie Cao,
Dongyu Zhang,
Pengxu Wei
[pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Ziyang and Chen, Weiyan and Xiao, Yao and Cao, Zijie and Zhang, Dongyu and Wei, Pengxu}, title = {Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8715-8724} }

IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing
Yuxuan Zhang,
Shijia Huang,
Liwei Wang
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and Huang, Shijia and Wang, Liwei}, title = {IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8776-8785} }

GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting
Yuhan Ping,
Cheng Lin,
Yuan Liu,
Zhiyang Dou,
Jia Pan,
Wenping Wang
[pdf]
[bibtex]
@InProceedings{Ping_2026_CVPR, author = {Ping, Yuhan and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Pan, Jia and Wang, Wenping}, title = {GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7132-7142} }

DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering
Xulun Ye,
Yuanyuan Deng,
Kun Zhou
[pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Deng, Yuanyuan and Zhou, Kun}, title = {DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7654-7664} }

UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization
Qing Huang,
Zhipei Xu,
Xuanyu Zhang,
Xiangyu Yu,
Jian Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8121-8132} }

Another BRIXEL in the Wall: Towards Cheaper Dense Features
Alexander Lappe,
Martin A. Giese
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lappe_2026_CVPR, author = {Lappe, Alexander and Giese, Martin A.}, title = {Another BRIXEL in the Wall: Towards Cheaper Dense Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7605-7614} }

VIDEOP2R: Video Understanding from Perception to Reasoning
Yifan Jiang,
Yueying Wang,
Rui Zhao,
Toufiq Parag,
Zhimin Chen,
Zhenyu Liao,
Jayakrishnan Unnikrishnan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yifan and Wang, Yueying and Zhao, Rui and Parag, Toufiq and Chen, Zhimin and Liao, Zhenyu and Unnikrishnan, Jayakrishnan}, title = {VIDEOP2R: Video Understanding from Perception to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8303-8313} }

Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution
Yuexin Wang,
Xiaolei Wang,
Guangliang Cheng,
Huihui Bai,
Tammam Tillo,
Jimin Xiao
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuexin and Wang, Xiaolei and Cheng, Guangliang and Bai, Huihui and Tillo, Tammam and Xiao, Jimin}, title = {Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8836-8845} }

Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts
Yongqi Yang,
Yuke Li,
Heng Huang,
Zhihui Li,
Bo Du,
Yu Wu
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yongqi and Li, Yuke and Huang, Heng and Li, Zhihui and Du, Bo and Wu, Yu}, title = {Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8019-8028} }

Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection
Peng Zhang,
Xiang Yuan,
Cong Li,
Junwei Han,
Gong Cheng
[pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peng and Yuan, Xiang and Li, Cong and Han, Junwei and Cheng, Gong}, title = {Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6829-6838} }

Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation
Sanjana Reddy,
Ishaan Malhi,
Sally Ma,
Praneet Dutta
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2026_CVPR, author = {Reddy, Sanjana and Malhi, Ishaan and Ma, Sally and Dutta, Praneet}, title = {Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8868-8876} }

VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection
Hui Han,
Shunli Wang,
Yandan Zhao,
Taiping Yao,
Shouhong Ding
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Hui and Wang, Shunli and Zhao, Yandan and Yao, Taiping and Ding, Shouhong}, title = {VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9552-9562} }

Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs
Hyungjin Chung,
Hyelin Nam,
Jiyeon Kim,
Hyojun Go,
Byeongjun Park,
Junho Kim,
Joonseok Lee,
Seongsu Ha,
Byung-Hoon Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Hyungjin and Nam, Hyelin and Kim, Jiyeon and Go, Hyojun and Park, Byeongjun and Kim, Junho and Lee, Joonseok and Ha, Seongsu and Kim, Byung-Hoon}, title = {Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8972-8982} }

NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries
Kanon Amemiya,
Daichi Yashima,
Kei Katsumata,
Takumi Komatsu,
Ryosuke Korekata,
Seitaro Otsuki,
Komei Sugiura
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amemiya_2026_CVPR, author = {Amemiya, Kanon and Yashima, Daichi and Katsumata, Kei and Komatsu, Takumi and Korekata, Ryosuke and Otsuki, Seitaro and Sugiura, Komei}, title = {NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9034-9044} }

AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models
Zijin Zhou,
Songan Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijin and Zhang, Songan}, title = {AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9259-9268} }

CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging
Ashwin Kumar,
Robbie Holland,
Corey Barrett,
Jangwon Kim,
Maya Varma,
Zhihong Chen,
Yunhe Gao,
Greg Zaharchuk,
Tara Taghavi,
Krishnaram Kenthapadi,
Akshay Chaudhari
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashwin and Holland, Robbie and Barrett, Corey and Kim, Jangwon and Varma, Maya and Chen, Zhihong and Gao, Yunhe and Zaharchuk, Greg and Taghavi, Tara and Kenthapadi, Krishnaram and Chaudhari, Akshay}, title = {CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9466-9476} }

Entropy-Based Visual Re-perception Inference for Multimodal Models
Jia Liufu,
Qiangyu Yan,
Zhehan Kan,
Wenming Yang,
Hailin Hu,
Xinghao Chen,
Borui Jiang
[pdf] [supp]
[bibtex]
@InProceedings{Liufu_2026_CVPR, author = {Liufu, Jia and Yan, Qiangyu and Kan, Zhehan and Yang, Wenming and Hu, Hailin and Chen, Xinghao and Jiang, Borui}, title = {Entropy-Based Visual Re-perception Inference for Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9770-9779} }

Is Prompt Selection Necessary for Task-Free Online Continual Learning?
Seoyoung Park,
Haemin Lee,
Hankook Lee
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Seoyoung and Lee, Haemin and Lee, Hankook}, title = {Is Prompt Selection Necessary for Task-Free Online Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7883-7892} }

Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection
Hantao Zheng,
Ning Han,
Yawen Zeng,
Hegui Zhu,
Hao Chen
[pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hantao and Han, Ning and Zeng, Yawen and Zhu, Hegui and Chen, Hao}, title = {Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9045-9054} }

ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection
Ling Yi,
Zhe Chen,
Gaochang Wu,
Jinliang Ding,
Xiaojie Wang,
Zhaolong Ning
[pdf]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ling and Chen, Zhe and Wu, Gaochang and Ding, Jinliang and Wang, Xiaojie and Ning, Zhaolong}, title = {ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7697-7705} }

CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment
Satyam Merothiya,
Chanda Grover Kamra,
Indra Deep Mastan
[pdf] [supp]
[bibtex]
@InProceedings{Merothiya_2026_CVPR, author = {Merothiya, Satyam and Kamra, Chanda Grover and Mastan, Indra Deep}, title = {CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8695-8704} }

FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning
Mengjie Li,
Liu Yang,
Qi Shen
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengjie and Yang, Liu and Shen, Qi}, title = {FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6987-6996} }

Model Merging on Loss Landscapes: A Geometric Perspective
Juanwu Lu,
Anand Bhaskar,
Brian Axelrod,
Ekaterina Tolstaya,
Tristan Emrich
[pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Juanwu and Bhaskar, Anand and Axelrod, Brian and Tolstaya, Ekaterina and Emrich, Tristan}, title = {Model Merging on Loss Landscapes: A Geometric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7644-7653} }

SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection
Yongchao Feng,
Ziyue Huang,
Jinqing Zhang,
Wenrui Cai,
Qingjie Liu
[pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yongchao and Huang, Ziyue and Zhang, Jinqing and Cai, Wenrui and Liu, Qingjie}, title = {SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7779-7788} }

VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering
Zihu Wang,
Boxun Xu,
Yuxuan Xia,
Peng Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihu and Xu, Boxun and Xia, Yuxuan and Li, Peng}, title = {VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9055-9064} }

Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning
Sungwon Woo,
Dongjun Hwang,
Shiwon Kim,
Junsuk Choe,
Jongho Nang
[pdf] [supp]
[bibtex]
@InProceedings{Woo_2026_CVPR, author = {Woo, Sungwon and Hwang, Dongjun and Kim, Shiwon and Choe, Junsuk and Nang, Jongho}, title = {Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7634-7643} }

FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment
Yicong Li,
Howard Leung
[pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yicong and Leung, Howard}, title = {FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8258-8267} }

WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation
Z. Jonny Kong,
Sibendu Paul,
Y. Charlie Hu
[pdf] [supp]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Z. Jonny and Paul, Sibendu and Hu, Y. Charlie}, title = {WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8618-8628} }

Organizing Unstructured Image Collections using Natural Language
Mingxuan Liu,
Zhun Zhong,
Jun Li,
Gianni Franchi,
Subhankar Roy,
Elisa Ricci
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxuan and Zhong, Zhun and Li, Jun and Franchi, Gianni and Roy, Subhankar and Ricci, Elisa}, title = {Organizing Unstructured Image Collections using Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8907-8918} }

Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs
Jouwon Song,
Sohyeon Kim,
Kyeongbo Kong
[pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jouwon and Kim, Sohyeon and Kong, Kyeongbo}, title = {Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9510-9519} }

BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models
Ziheng Zhu,
Yuncheng Guo,
Jie Xu,
Xiaodong Gu
[pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziheng and Guo, Yuncheng and Xu, Jie and Gu, Xiaodong}, title = {BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7060-7069} }

Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation
Shuqi Xia,
Guangze Shi,
Jiarui Cao,
Aoyuan Shi,
Meilin Liu,
Xiaoyi Zhang,
Yujie Wang,
Xueyu Liu,
Cai Zhao,
Ziyuan He,
Yongfei Wu,
Mingqiang Wei
[pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Shuqi and Shi, Guangze and Cao, Jiarui and Shi, Aoyuan and Liu, Meilin and Zhang, Xiaoyi and Wang, Yujie and Liu, Xueyu and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang}, title = {Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7514-7519} }

VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning
Zengjie Hu,
Jiantao Qiu,
Tianyi Bai,
Haojin Yang,
Binhang Yuan,
Qi Jing,
Conghui He,
Wentao Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zengjie and Qiu, Jiantao and Bai, Tianyi and Yang, Haojin and Yuan, Binhang and Jing, Qi and He, Conghui and Zhang, Wentao}, title = {VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9846-9855} }

Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling
Minyoung Kim,
Paul Hongsuck Seo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung and Seo, Paul Hongsuck}, title = {Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8877-8886} }

GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction
Wanyu Zhang,
Yanzhao Shi,
Chengxin Zheng,
Hua Wang,
Jianing Wang,
Yue Zhang,
Xiaobing Yu,
Xiaodan Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyu and Shi, Yanzhao and Zheng, Chengxin and Wang, Hua and Wang, Jianing and Zhang, Yue and Yu, Xiaobing and Zhang, Xiaodan}, title = {GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9622-9631} }

AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts
Yuting Gao,
Lan Wang,
Hengyuan Zhao,
Linjiang Huang,
Si Liu,
Qingpei Guo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuting and Wang, Lan and Zhao, Hengyuan and Huang, Linjiang and Liu, Si and Guo, Qingpei}, title = {AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9205-9214} }

SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching
Chengshan Yang,
Pengnian Zhang,
Jinjing Zhao
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chengshan and Zhang, Pengnian and Zhao, Jinjing}, title = {SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6695-6705} }

PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction
Xueheng Li,
Tao Hu,
Ke Cao,
Runsheng Qi,
Huixin Zhang,
Rui Li,
Jie Zhang,
Chengjun Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xueheng and Hu, Tao and Cao, Ke and Qi, Runsheng and Zhang, Huixin and Li, Rui and Zhang, Jie and Xie, Chengjun}, title = {PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8826-8835} }

RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models
Ravi Ranjan,
Utkarsh Grover,
Xiaomin Lin,
Agoritsa Polyzou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranjan_2026_CVPR, author = {Ranjan, Ravi and Grover, Utkarsh and Lin, Xiaomin and Polyzou, Agoritsa}, title = {RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7998-8008} }

Beyond Single Object: Learning 3D Relations with Large Language Models
Kohsuke Ide,
Ryousuke Yamada,
Yue Qiu,
Xianzheng Ma,
Yoshihiro Fukuhara,
Hirokatsu Kataoka,
Yutaka Satoh
[pdf] [supp]
[bibtex]
@InProceedings{Ide_2026_CVPR, author = {Ide, Kohsuke and Yamada, Ryousuke and Qiu, Yue and Ma, Xianzheng and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Satoh, Yutaka}, title = {Beyond Single Object: Learning 3D Relations with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9684-9694} }

Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models
Sijie Wang,
Yingying Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sijie and Zhu, Yingying}, title = {Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7615-7624} }

Learning from Label Proportion with Dual-Proportion Constraints
Tianhao Ma,
Ximing Li,
Changchun Li,
Renchu Guan
[pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tianhao and Li, Ximing and Li, Changchun and Guan, Renchu}, title = {Learning from Label Proportion with Dual-Proportion Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7583-7592} }

Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis
Helu Zhi,
Jingjing Huang,
Wang Xu,
Yangbin Xu,
Yibin Huang,
Wanyue Zhang,
Baoyang Jiang,
Shirui Deng,
Liang Zhu,
FangFang Li,
Tiejun Zhao,
Yankai Lin,
Yuan Yao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhi_2026_CVPR, author = {Zhi, Helu and Huang, Jingjing and Xu, Wang and Xu, Yangbin and Huang, Yibin and Zhang, Wanyue and Jiang, Baoyang and Deng, Shirui and Zhu, Liang and Li, FangFang and Zhao, Tiejun and Lin, Yankai and Yao, Yuan}, title = {Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9215-9224} }

PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning
Tao Huang,
Jiayang Meng,
Hong Chen,
Chen Hou,
Guolong Zheng,
Xu Yang
[pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tao and Meng, Jiayang and Chen, Hong and Hou, Chen and Zheng, Guolong and Yang, Xu}, title = {PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8029-8038} }

Gen-n-Val: Agentic Image Data Generation and Validation
Jing-En Huang,
I-Sheng Fang,
Tzuhsuan Huang,
Yu-Lun Liu,
Chih-Yu Wang,
Jun-Cheng Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jing-En and Fang, I-Sheng and Huang, Tzuhsuan and Liu, Yu-Lun and Wang, Chih-Yu and Chen, Jun-Cheng}, title = {Gen-n-Val: Agentic Image Data Generation and Validation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8786-8795} }

QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos
Zijun Xu,
Zhengqian Wu,
Chunjie Zhang,
Zhongyuan Wang,
Chunxia Xiao,
Chao Liang
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zijun and Wu, Zhengqian and Zhang, Chunjie and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8247-8257} }

AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting
Yuyuan Liu,
Yuanhong Chen,
Chong Wang,
Junlin Han,
Junde Wu,
Can Peng,
Jingkun Chen,
Yu Tian,
Gustavo Carneiro
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Chen, Yuanhong and Wang, Chong and Han, Junlin and Wu, Junde and Peng, Can and Chen, Jingkun and Tian, Yu and Carneiro, Gustavo}, title = {AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7315-7325} }

Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal
Eun-Ju Park,
Youjin Shin,
Simon S. Woo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Eun-Ju and Shin, Youjin and Woo, Simon S.}, title = {Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7978-7987} }

Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation
Wenjie Zhao,
Jia Li,
Xin Dong,
Yapeng Tian,
Yu Xiang,
Yunhui Guo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Tian, Yapeng and Xiang, Yu and Guo, Yunhui}, title = {Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6789-6798} }

Towards Robust Content Watermarking Against Removal and Forgery Attacks
Yifan Zhu,
Yihan Wang,
Xiao-Shan Gao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yifan and Wang, Yihan and Gao, Xiao-Shan}, title = {Towards Robust Content Watermarking Against Removal and Forgery Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8059-8069} }

ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models
Nastaran Darabi,
Amit Ranjan Trivedi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darabi_2026_CVPR, author = {Darabi, Nastaran and Trivedi, Amit Ranjan}, title = {ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9013-9022} }

Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation
Yadang Chen,
Qi Liu,
Guoqing Zhang,
Le Sun,
Yuhui Zheng
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yadang and Liu, Qi and Zhang, Guoqing and Sun, Le and Zheng, Yuhui}, title = {Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7841-7851} }

LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation
Haichao Zhang,
Yao Lu,
Lichen Wang,
Yunzhe Li,
Daiwei Chen,
Yunpeng Xu,
Yun Fu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haichao and Lu, Yao and Wang, Lichen and Li, Yunzhe and Chen, Daiwei and Xu, Yunpeng and Fu, Yun}, title = {LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7111-7121} }

VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning
Chenglin Li,
Qianglong Chen,
Feng Han,
Yikun Wang,
Xingxi Yin,
Yan Gong,
Ruilin Li,
Yin Zhang,
Jiaqi Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenglin and Chen, Qianglong and Han, Feng and Wang, Yikun and Yin, Xingxi and Gong, Yan and Li, Ruilin and Zhang, Yin and Wang, Jiaqi}, title = {VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8226-8236} }

EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration
Runze Li,
Yuwen Zhai,
Bo Xu,
Liwu Xu,
Nian Shi,
Wei Zhang,
Ran Lin,
Liang Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Runze and Zhai, Yuwen and Xu, Bo and Xu, Liwu and Shi, Nian and Zhang, Wei and Lin, Ran and Wang, Liang}, title = {EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9347-9356} }

V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning
Zixu Cheng,
Jian Hu,
Ziquan Liu,
Chenyang Si,
Wei Li,
Shaogang Gong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zixu and Hu, Jian and Liu, Ziquan and Si, Chenyang and Li, Wei and Gong, Shaogang}, title = {V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9155-9164} }

Temporally Consistent Long-Term Memory for 3D Single Object Tracking
Jaejoon Yoo,
SuBeen Lee,
Yerim Jeon,
Miso Lee,
Jae-Pil Heo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2026_CVPR, author = {Yoo, Jaejoon and Lee, SuBeen and Jeon, Yerim and Lee, Miso and Heo, Jae-Pil}, title = {Temporally Consistent Long-Term Memory for 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8388-8397} }

Why MLLMs Struggle to Determine Object Orientations
Anju Gopinath,
Nikhil Krishnaswamy,
Bruce Draper
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gopinath_2026_CVPR, author = {Gopinath, Anju and Krishnaswamy, Nikhil and Draper, Bruce}, title = {Why MLLMs Struggle to Determine Object Orientations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9836-9845} }

Mull-Tokens: Modality-Agnostic Latent Thinking
Arijit Ray,
Ahmed Abdelkader,
Chengzhi Mao,
Bryan A. Plummer,
Kate Saenko,
Ranjay Krishna,
Leonidas Guibas,
Wen-Sheng Chu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Arijit and Abdelkader, Ahmed and Mao, Chengzhi and Plummer, Bryan A. and Saenko, Kate and Krishna, Ranjay and Guibas, Leonidas and Chu, Wen-Sheng}, title = {Mull-Tokens: Modality-Agnostic Latent Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9477-9488} }

RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models
Omar Alama,
Darshil Jariwala,
Avigyan Bhattacharya,
Seungchan Kim,
Wenshan Wang,
Sebastian Scherer
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alama_2026_CVPR, author = {Alama, Omar and Jariwala, Darshil and Bhattacharya, Avigyan and Kim, Seungchan and Wang, Wenshan and Scherer, Sebastian}, title = {RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9294-9304} }

SPHINX: A Synthetic Environment for Visual Perception and Reasoning
Md Tanvirul Alam,
Saksham Aggarwal,
Justin Yang Chae,
Nidhi Rastogi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alam_2026_CVPR, author = {Alam, Md Tanvirul and Aggarwal, Saksham and Chae, Justin Yang and Rastogi, Nidhi}, title = {SPHINX: A Synthetic Environment for Visual Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9489-9499} }

OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism
Jordan Shipard,
Arnold Wiliem,
Kien Nguyen Thanh,
Wei Xiang,
Clinton Fookes
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shipard_2026_CVPR, author = {Shipard, Jordan and Wiliem, Arnold and Thanh, Kien Nguyen and Xiang, Wei and Fookes, Clinton}, title = {OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6768-6778} }

Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models
Woojun Jung,
Jaehoon Go,
Mingyu Jeon,
Sunjae Yoon,
Junyeong Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woojun and Go, Jaehoon and Jeon, Mingyu and Yoon, Sunjae and Kim, Junyeong}, title = {Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8962-8971} }

HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models
Zhinan Xie,
Peisong Wang,
Shuang Qiu,
Jian Cheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhinan and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8952-8961} }

Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification
Inès Hyeonsu Kim,
Woojeong Jin,
Soowon Son,
Junyoung Seo,
Seokju Cho,
JeongYeol Baek,
Byeongwon Lee,
JoungBin Lee,
Seungryong Kim
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Jin, Woojeong and Son, Soowon and Seo, Junyoung and Cho, Seokju and Baek, JeongYeol and Lee, Byeongwon and Lee, JoungBin and Kim, Seungryong}, title = {Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8640-8650} }

Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation
Jianhang Ji,
Zhiming Cheng,
Jianxiang Zhao,
Bingtao Ma,
Hao Chen,
Yuhan Gao,
Lian Zhang,
Zuobin Ying,
Shuai Wang
[pdf]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Jianhang and Cheng, Zhiming and Zhao, Jianxiang and Ma, Bingtao and Chen, Hao and Gao, Yuhan and Zhang, Lian and Ying, Zuobin and Wang, Shuai}, title = {Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7799-7809} }

Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space
Chengzhi Liu,
Yuzhe Yang,
Yue Fan,
Qingyue Wei,
Sheng Liu,
Xin Eric Wang
[pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengzhi and Yang, Yuzhe and Fan, Yue and Wei, Qingyue and Liu, Sheng and Wang, Xin Eric}, title = {Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9225-9236} }

SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units
Ruibin Wang,
Zhenyu Lin,
Xinhai Zhao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruibin and Lin, Zhenyu and Zhao, Xinhai}, title = {SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8796-8805} }

Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation
Hongli Liu,
Yu Wang,
Shengjie Zhao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongli and Wang, Yu and Zhao, Shengjie}, title = {Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7419-7428} }

A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models
Mehmet Demirel,
Christos Kyrkou
[pdf] [supp]
[bibtex]
@InProceedings{Demirel_2026_CVPR, author = {Demirel, Mehmet and Kyrkou, Christos}, title = {A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7541-7551} }

Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps
Xiangjun Gao,
Zhensong Zhang,
Dave Zhenyu Chen,
Songcen Xu,
Long Quan,
Eduardo Pérez-Pellitero,
Youngkyoon Jang
[pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Xiangjun and Zhang, Zhensong and Chen, Dave Zhenyu and Xu, Songcen and Quan, Long and P\'erez-Pellitero, Eduardo and Jang, Youngkyoon}, title = {Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7154-7164} }

On the Group Disparities Arising from Machine Unlearning
Zijie Pan,
Zuobin Ying,
Yajie Wang,
Liehuang Zhu,
Wanlei Zhou
[pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zijie and Ying, Zuobin and Wang, Yajie and Zhu, Liehuang and Zhou, Wanlei}, title = {On the Group Disparities Arising from Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8133-8142} }

Language-Augmented Semantic Priors for B-Spline Surface Fitting
Yunzhong Lou,
Yusheng Luo,
Jiahao Li,
Yu Song,
Xiangdong Zhou
[pdf] [supp]
[bibtex]
@InProceedings{Lou_2026_CVPR, author = {Lou, Yunzhong and Luo, Yusheng and Li, Jiahao and Song, Yu and Zhou, Xiangdong}, title = {Language-Augmented Semantic Priors for B-Spline Surface Fitting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9120-9130} }

FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning
Jingchen Ni,
Quan Zhang,
Dan Jiang,
Keyu Lv,
Ke Zhang,
Chun Yuan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Jingchen and Zhang, Quan and Jiang, Dan and Lv, Keyu and Zhang, Ke and Yuan, Chun}, title = {FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7439-7449} }

Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection
Alex Costanzino,
Pierluigi Zama Ramirez,
Giuseppe Lisanti,
Luigi Di Stefano
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2026_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8816-8825} }

MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking
Yisong Liu,
He Yao,
Junlong Cheng,
Yujie Lu,
Junqi Bai,
Min Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yisong and Yao, He and Cheng, Junlong and Lu, Yujie and Bai, Junqi and Zhu, Min}, title = {MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8378-8387} }

CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection
Qiyu Chen,
Zhen Qu,
Wei Luo,
Haiming Yao,
Yunkang Cao,
Yuxin Jiang,
Yinan Duan,
Huiyuan Luo,
Chengkan Lv,
Zhengtao Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qiyu and Qu, Zhen and Luo, Wei and Yao, Haiming and Cao, Yunkang and Jiang, Yuxin and Duan, Yinan and Luo, Huiyuan and Lv, Chengkan and Zhang, Zhengtao}, title = {CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8554-8563} }

Test-Time Distillation for Continual Model Adaptation
Xiao Chen,
Jiazhen Huang,
Zhiming Liu,
Qinting Jiang,
Fanding Huang,
Jingyan Jiang,
Zhi Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Huang, Jiazhen and Liu, Zhiming and Jiang, Qinting and Huang, Fanding and Jiang, Jingyan and Wang, Zhi}, title = {Test-Time Distillation for Continual Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7593-7604} }

Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality
Yanming Xiu,
Zhengyuan Jiang,
Neil Zhenqiang Gong,
Maria Gorlatova
[pdf] [arXiv]
[bibtex]
@InProceedings{Xiu_2026_CVPR, author = {Xiu, Yanming and Jiang, Zhengyuan and Gong, Neil Zhenqiang and Gorlatova, Maria}, title = {Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9110-9119} }

Disrupting Positional Encoding for Effective Open Set Recognition
Yu Wang,
Jiabo Xie,
Yucan Zhou,
Junxian Mu,
Qinghua Hu,
Pengfei Zhu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Xie, Jiabo and Zhou, Yucan and Mu, Junxian and Hu, Qinghua and Zhu, Pengfei}, title = {Disrupting Positional Encoding for Effective Open Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6633-6642} }

Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback
Jianglin Lu,
Yuanwei Wu,
Ziyi Zhao,
Hongcheng Wang,
Felix Jimenez,
Abrar Majeedi,
Yun Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jianglin and Wu, Yuanwei and Zhao, Ziyi and Wang, Hongcheng and Jimenez, Felix and Majeedi, Abrar and Fu, Yun}, title = {Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8629-8639} }

Bi-Level Optimization for Single Domain Generalization
Marzi Heidari,
Hanping Zhang,
Hao Yan,
Yuhong Guo
[pdf] [arXiv]
[bibtex]
@InProceedings{Heidari_2026_CVPR, author = {Heidari, Marzi and Zhang, Hanping and Yan, Hao and Guo, Yuhong}, title = {Bi-Level Optimization for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6685-6694} }

EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction
Xinan Zhang,
Muhammad Zubair Irshad,
Anthony Yezzi,
Yi-Chang Tsai,
Zsolt Kira
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinan and Irshad, Muhammad Zubair and Yezzi, Anthony and Tsai, Yi-Chang and Kira, Zsolt}, title = {EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8846-8856} }

Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?
Zihao Dongfang,
Xu Zheng,
Ziqiao Weng,
Yuanhuiyi Lyu,
Danda Pani Paudel,
Luc Van Gool,
Kailun Yang,
Xuming Hu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dongfang_2026_CVPR, author = {Dongfang, Zihao and Zheng, Xu and Weng, Ziqiao and Lyu, Yuanhuiyi and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun and Hu, Xuming}, title = {Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9759-9769} }

SCOPE: Spatially Ordered Continual Learning for 3D Segmentation
Wenhao Xu,
Huaidong Zhang,
Weipeng Zhang,
Qianle Zhang,
Shengfeng He
[pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Wenhao and Zhang, Huaidong and Zhang, Weipeng and Zhang, Qianle and He, Shengfeng}, title = {SCOPE: Spatially Ordered Continual Learning for 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7862-7871} }

Back