CVPR 2026 Open Access Repository

Findings

Back
Revisiting Real-Time Detection Transformer with Efficient Encoder Design: Jiannan Huang,

Aditya Kane,

Fengzhe Zhou,

Yunchao Wei,

Humphrey Shi; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jiannan and Kane, Aditya and Zhou, Fengzhe and Wei, Yunchao and Shi, Humphrey}, title = {Revisiting Real-Time Detection Transformer with Efficient Encoder Design}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6859-6868} }
Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models: Hoin Jung,

Shenyu Lu,

De Wang,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Hoin and Lu, Shenyu and Wang, De and Wang, Xiaoqian}, title = {Assessing the Reliability of Image Quality Metrics and Mitigating Quality Bias in Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7956-7967} }
Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation: Shristi Das Biswas,

Arani Roy,

Kaushik Roy; [pdf] [supp]
[bibtex]
@InProceedings{Das_Biswas_2026_CVPR, author = {Das Biswas, Shristi and Roy, Arani and Roy, Kaushik}, title = {Now You See It, Now You Don't: Instant Concept Erasure for Safe Text-to-Image and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7903-7913} }
Towards Generalization of Scene Text Tampering Localization via Causal Invariance: Huiru Shao,

Bin Dong,

Kaizhu Huang,

Xiaowei Huang,

Qiufeng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Huiru and Dong, Bin and Huang, Kaizhu and Huang, Xiaowei and Wang, Qiufeng}, title = {Towards Generalization of Scene Text Tampering Localization via Causal Invariance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7262-7271} }
TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation: K Naveen Kumar,

Mohsen Guizani; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, K Naveen and Guizani, Mohsen}, title = {TriGuard-FL: A User-Centric Trust Triad in Federated Learning via Auditable Data, Verifiable Contributions, and Antidote-Driven Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7945-7955} }
CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images: Chengqi Duan,

Kaiyue Sun,

Rongyao Fang,

Manyuan Zhang,

Yan Feng,

Ying Luo,

Yufang Liu,

Ke Wang,

Peng Pei,

Xunliang Cai,

Hongsheng Li,

Yi Ma,

Xihui Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duan_2026_CVPR, author = {Duan, Chengqi and Sun, Kaiyue and Fang, Rongyao and Zhang, Manyuan and Feng, Yan and Luo, Ying and Liu, Yufang and Wang, Ke and Pei, Peng and Cai, Xunliang and Li, Hongsheng and Ma, Yi and Liu, Xihui}, title = {CodePlot-CoT: Mathematical Visual Reasoning by Thinking with Code-Driven Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9586-9596} }
Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather: Nikhil Kumar Jangamreddy,

Mahsa Baktashmotlagh,

Chetan Arora; [pdf] [supp]
[bibtex]
@InProceedings{Jangamreddy_2026_CVPR, author = {Jangamreddy, Nikhil Kumar and Baktashmotlagh, Mahsa and Arora, Chetan}, title = {Continual Adaptation of Vision Foundational Models for Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7686-7696} }
Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models: Yujun Tong,

Dongliang Chang,

Zijin Yin,

Xintong Liu,

Yuanchen Fang,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Tong_2026_CVPR, author = {Tong, Yujun and Chang, Dongliang and Yin, Zijin and Liu, Xintong and Fang, Yuanchen and Ma, Zhanyu}, title = {Reversing the Flow: Generation-to-Understanding Synergy in Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6976-6986} }
VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection: Chupeng Liu,

Jiyong Rao,

Shangquan Sun,

Runkai Zhao,

Weidong Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chupeng and Rao, Jiyong and Sun, Shangquan and Zhao, Runkai and Cai, Weidong}, title = {VirPro: Visual-Referred Probabilistic Prompt Learning for Weakly-Supervised Monocular 3D Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7530-7540} }
Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning: Siqi Yang,

Zilve Gao,

Haibo Qiu,

Fanfan Liu,

Peng Shi,

Zhixiong Zeng,

Qingmin Liao,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Siqi and Gao, Zilve and Qiu, Haibo and Liu, Fanfan and Shi, Peng and Zeng, Zhixiong and Liao, Qingmin and Ma, Lin}, title = {Learning When to Look: A Disentangled Curriculum for Strategic Perception in Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9738-9748} }
QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery: Xuan Bac Nguyen,

Hoang-Quan Nguyen,

Sankalp Pandey,

Tim Faltermeier,

Nicholas Borys,

Hugh Churchill,

Khoa Luu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Xuan Bac and Nguyen, Hoang-Quan and Pandey, Sankalp and Faltermeier, Tim and Borys, Nicholas and Churchill, Hugh and Luu, Khoa}, title = {QuPAINT: Physics-Aware Instruction Tuning Approach to Quantum Material Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8684-8694} }
DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding: Tanveer Hannan,

Dimitrios Mallios,

Parth Pathak,

Faegheh Sardari,

Thomas Seidl,

Gedas Bertasius,

Mohsen Fayyaz,

Sunando Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hannan_2026_CVPR, author = {Hannan, Tanveer and Mallios, Dimitrios and Pathak, Parth and Sardari, Faegheh and Seidl, Thomas and Bertasius, Gedas and Fayyaz, Mohsen and Sengupta, Sunando}, title = {DocSLM: A Small Vision-Language Model for Long Multimodal Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9337-9346} }
Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting: Shizhao Gao,

Jun Li,

Qiming Li; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Shizhao and Li, Jun and Li, Qiming}, title = {Count What Repeats: Period-Adaptive Multi-Scale Consistency for Self-Supervised Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8143-8152} }
Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation: Yizhou Liu,

Dingkang Yang,

Zizhi Chen,

Minghao Han,

Xukun Zhang,

Keliang Liu,

Jingwei Wei,

Lihua Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yizhou and Yang, Dingkang and Chen, Zizhi and Han, Minghao and Zhang, Xukun and Liu, Keliang and Wei, Jingwei and Zhang, Lihua}, title = {Adaptive Reinforcement for Open-ended Medical Reasoning via Semantic-Guided Reward Collapse Mitigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8651-8660} }
Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping: Youngmin Oh,

Changjae Oh,

Bumsub Ham; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Youngmin and Oh, Changjae and Ham, Bumsub}, title = {Proto-SaGa: Prototype-based 3D Scene Segmentation with Semantic-aware Gaussian Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7493-7502} }
Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs: Yujin Jo,

Sangyoon Bae,

Taesup Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jo_2026_CVPR, author = {Jo, Yujin and Bae, Sangyoon and Kim, Taesup}, title = {Attention-Space Contrastive Guidance for Efficient Hallucination Mitigation in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9706-9715} }
It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models: Jaeha Choi,

Jin Won Lee,

Siwoo You,

Jangho Lee; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeha and Lee, Jin Won and You, Siwoo and Lee, Jangho}, title = {It's Time to Get It Right: Improving Analog Clock Reading and Clock-Hand Spatial Reasoning in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9500-9509} }
STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding: Wenhao Li,

Xueying Jiang,

Gongjie Zhang,

Xiaoqin Zhang,

Ling Shao,

Shijian Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Wenhao and Jiang, Xueying and Zhang, Gongjie and Zhang, Xiaoqin and Shao, Ling and Lu, Shijian}, title = {STS-Mixer: Spatio-Temporal-Spectral Mixer for 4D Point Cloud Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8185-8194} }
VACoT: Rethinking Visual Data Augmentation with VLMs: Zhengzhuo Xu,

Chong Sun,

SiNan Du,

Chen Li,

Jing Lyu,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhengzhuo and Sun, Chong and Du, SiNan and Li, Chen and Lyu, Jing and Yuan, Chun}, title = {VACoT: Rethinking Visual Data Augmentation with VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9780-9790} }
Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding: Ziyang Wang,

Honglu Zhou,

Shijie Wang,

Junnan Li,

Caiming Xiong,

Silvio Savarese,

Mohit Bansal,

Michael S. Ryoo,

Juan Carlos Niebles; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyang and Zhou, Honglu and Wang, Shijie and Li, Junnan and Xiong, Caiming and Savarese, Silvio and Bansal, Mohit and Ryoo, Michael S. and Niebles, Juan Carlos}, title = {Active Video Perception: Iterative Evidence Seeking for Agentic Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9088-9099} }
BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation: Zihao Zhu,

Ruotong Wang,

Siwei Lyu,

Min Zhang,

Baoyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zihao and Wang, Ruotong and Lyu, Siwei and Zhang, Min and Wu, Baoyuan}, title = {BrandFusion: A Multi-Agent Framework for Seamless Brand Integration in Text-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8661-8671} }
Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery: Bohan Zhang,

Weidong Tang,

Zhixiang Chi,

Yi Jin,

Zhenbo Li,

Yang Wang,

Yanan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Bohan and Tang, Weidong and Chi, Zhixiang and Jin, Yi and Li, Zhenbo and Wang, Yang and Wu, Yanan}, title = {Learning through Creation: A Hash-Free Framework for On-the-Fly Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7830-7840} }
Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs: Chenxi Zhao,

Yan Zhou,

Jufeng Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Chenxi and Zhou, Yan and Yang, Jufeng}, title = {Attention Never Lie: Visual Attention Defocus Reveals and Rectifies Hallucinations in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8897-8906} }
Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization: Qinghui He,

Haifeng Zhang,

Xiuli Bi,

Bo Liu,

Chi-Man Pun,

Bin Xiao; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Qinghui and Zhang, Haifeng and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Xiao, Bin}, title = {Leveraging Arbitrary Data Sources for AI-Generated Image Detection Without Sacrificing Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6748-6757} }
ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection: Yupeng Zhang,

Ruize Han,

Fangnan Zhou,

Wei Feng,

Liang Wan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yupeng and Han, Ruize and Zhou, Fangnan and Feng, Wei and Wan, Liang}, title = {ODOV: Benchmark the Open-Domain Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6643-6654} }
NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation: Huanxin Zou,

Zhize Wu,

Yue Jiang,

Jijian Zhou,

Zhiwei Xu,

Teng Li,

Jianhua Shu,

Fan Cheng; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Huanxin and Wu, Zhize and Jiang, Yue and Zhou, Jijian and Xu, Zhiwei and Li, Teng and Shu, Jianhua and Cheng, Fan}, title = {NRFP: A Noise-Robust Feature Plugin for Source-Free Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7562-7572} }
From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness: Chenlin Fu,

Ao Gong,

Xingtao Ling,

Yingying Zhu; [pdf]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Chenlin and Gong, Ao and Ling, Xingtao and Zhu, Yingying}, title = {From Horizontal to Rotated: Cross-View Object Geo-Localization with Orientation Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7101-7110} }
ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding: Hosu Lee,

Junho Kim,

Hyunjun Kim,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Hosu and Kim, Junho and Kim, Hyunjun and Ro, Yong Man}, title = {ReFoCUS: Reinforcement-guided Frame Optimization for Contextual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8291-8302} }
VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models: Pavan Kumar Anasosalu Vasu,

Cem Koc,

Fartash Faghri,

Chun-Liang Li,

Bo Feng,

Zhengfeng Lai,

Meng Cao,

Oncel Tuzel,

Hadi Pouransari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2026_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Koc, Cem and Faghri, Fartash and Li, Chun-Liang and Feng, Bo and Lai, Zhengfeng and Cao, Meng and Tuzel, Oncel and Pouransari, Hadi}, title = {VSAS-Bench: Real-Time Evaluation of Visual Streaming Assistant Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9654-9663} }
Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models: Haoyi Sun,

Xiaoxiao Wang,

Ning Mao,

Qian Wang,

Lifu Mu,

Wen Zheng,

Tao Wei,

Wei Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Haoyi and Wang, Xiaoxiao and Mao, Ning and Wang, Qian and Mu, Lifu and Zheng, Wen and Wei, Tao and Chen, Wei}, title = {Switch-KD: Visual-Switch Knowledge Distillation for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9316-9326} }
DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning: Chi Zhang,

Haibo Qiu,

Qiming Zhang,

Zhixiong Zeng,

Lin Ma,

Jing Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Qiu, Haibo and Zhang, Qiming and Zeng, Zhixiong and Ma, Lin and Zhang, Jing}, title = {DeepSketcher: Internalizing Visual Manipulation for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9389-9400} }
Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection: Sanghoon Lee,

Geon Lee,

Hyekang Park,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Sanghoon and Lee, Geon and Park, Hyekang and Ham, Bumsub}, title = {Exploring Hierarchical Consistency and Unbiased Objectness for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6819-6828} }
Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios: Xuekang Zhu,

Ji-Zhe Zhou,

Kaiwen Feng,

Chenfan Qu,

Xiwen Wang,

Yunfei Wang,

Liting Zhou,

Jian Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Xuekang and Zhou, Ji-Zhe and Feng, Kaiwen and Qu, Chenfan and Wang, Xiwen and Wang, Yunfei and Zhou, Liting and Liu, Jian}, title = {Revisiting Image Manipulation Localization under Realistic Manipulation Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7198-7207} }
MIRA: Multimodal Iterative Reasoning Agent for Image Editing: Ziyun Zeng,

Hang Hua,

Jiebo Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Ziyun and Hua, Hang and Luo, Jiebo}, title = {MIRA: Multimodal Iterative Reasoning Agent for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9563-9573} }
SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation: Gia Huy Thai,

Hoang-Nguyen Vu,

Anh-Minh Phan,

Quang-Thinh Ly,

Thi-Ngoc-Truc Nguyen,

Nhat Ho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thai_2026_CVPR, author = {Thai, Gia Huy and Vu, Hoang-Nguyen and Phan, Anh-Minh and Ly, Quang-Thinh and Nguyen, Thi-Ngoc-Truc and Ho, Nhat}, title = {SAGE: Shape-Adapting Gated Experts for Adaptive Histopathology Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7337-7346} }
HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping: Zahid Hassan Tushar,

Sanjay Purushotham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tushar_2026_CVPR, author = {Tushar, Zahid Hassan and Purushotham, Sanjay}, title = {HyperFM: A Efficient Hyperspectral Foundation Model with Spectral Grouping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6955-6965} }
Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification: Han Sun,

Qin Li,

Peixin Wang,

Min Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Han and Li, Qin and Wang, Peixin and Zhang, Min}, title = {Mitigating Object Hallucinations in LVLMs via Attention Imbalance Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8930-8940} }
Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters: Chris Vorster,

Mayug Maniparambil,

Noel O'Connor,

Noel Murphy,

Derek Molloy; [pdf] [supp]
[bibtex]
@InProceedings{Vorster_2026_CVPR, author = {Vorster, Chris and Maniparambil, Mayug and O'Connor, Noel and Murphy, Noel and Molloy, Derek}, title = {Hold-One-Shot-Out (HOSO) for Validation-Free Few-Shot CLIP Adapters}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7820-7829} }
Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks: Shijie Lian,

Changti Wu,

Laurence Tianruo Yang,

Hang Yuan,

Bin Yu,

Lei Zhang,

Kai Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shijie and Wu, Changti and Yang, Laurence Tianruo and Yuan, Hang and Yu, Bin and Zhang, Lei and Chen, Kai}, title = {Euclid's Gift: Enhancing Spatial Perception and Reasoning in Vision-Language Models via Geometric Surrogate Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9824-9835} }
PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views: Jiaxin Shi,

Guofeng Zhang,

Wufei Ma,

Naifu Liang,

Adam Kortylewski,

Alan Yuille; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Jiaxin and Zhang, Guofeng and Ma, Wufei and Liang, Naifu and Kortylewski, Adam and Yuille, Alan}, title = {PASR: Pose-Aware 3D Shape Retrieval from Occluded Single Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6869-6879} }
MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation: Hanjun Tao,

Hua Wang,

Fan Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Tao_2026_CVPR, author = {Tao, Hanjun and Wang, Hua and Zhang, Fan}, title = {MHMamba: Multi-Head Mamba for 3D Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7388-7397} }
OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation: Seungjae Moon,

Seunghyun Oh,

Youngmin Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2026_CVPR, author = {Moon, Seungjae and Oh, Seunghyun and Ro, Youngmin}, title = {OV-Stitcher: A Global Context-Aware Framework for Training-Free Open Vocabulary Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7357-7367} }
TP^2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids: Ya-Yun Cheng,

Kan Tippayamontri,

Chih-Yuan Yang,

Jane Yung-jen Hsu; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Ya-Yun and Tippayamontri, Kan and Yang, Chih-Yuan and Hsu, Jane Yung-jen}, title = {TP{\textasciicircum}2-DETR: Unlocking Deformable DETR for Zero-Shot Temporal Action Proposal Generation with Temporal Feature Pyramids}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8237-8246} }
ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling: Shaobo Ju,

Baiyang Song,

Tao Chen,

Jiapeng Zhang,

Qiong Wu,

Chao Chang,

Huaixi Wang,

Yiyi Zhou,

Rongrong Ji; [pdf] [arXiv]
[bibtex]
@InProceedings{Ju_2026_CVPR, author = {Ju, Shaobo and Song, Baiyang and Chen, Tao and Zhang, Jiapeng and Wu, Qiong and Chang, Chao and Wang, Huaixi and Zhou, Yiyi and Ji, Rongrong}, title = {ForestPrune: High-ratio Visual Token Compression for Video Multimodal Large Language Models Via Spatial-Temporal Forest Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8326-8336} }
CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare: Akash Ghosh,

Tajamul Ashraf,

Rishu Kumar Singh,

Numan Saeed,

Sriparna Saha,

Xiuying Chen,

Salman Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ghosh_2026_CVPR, author = {Ghosh, Akash and Ashraf, Tajamul and Singh, Rishu Kumar and Saeed, Numan and Saha, Sriparna and Chen, Xiuying and Khan, Salman}, title = {CarePilot: A Multi-Agent Framework for Long-Horizon Computer Task Automation in Healthcare}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9695-9705} }
CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition: Zhongquan Jian,

Yanhao Chen,

Bingbing Hu,

Wenhan Lv,

Shaopan Wang,

Jipeng Wu,

Junfeng Yao,

Yang Lu,

Qingqiang Wu; [pdf] [supp]
[bibtex]
@InProceedings{Jian_2026_CVPR, author = {Jian, Zhongquan and Chen, Yanhao and Hu, Bingbing and Lv, Wenhan and Wang, Shaopan and Wu, Jipeng and Yao, Junfeng and Lu, Yang and Wu, Qingqiang}, title = {CurrMix: Curriculum-Enhanced MixUp for Long-Tailed Visual Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7706-7716} }
Object-Centric Vision Token Pruning for Vision Language Models: Guangyuan Li,

Rongzhen Zhao,

Jinhong Deng,

Yanbo Wang,

Joni Pajarinen; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Guangyuan and Zhao, Rongzhen and Deng, Jinhong and Wang, Yanbo and Pajarinen, Joni}, title = {Object-Centric Vision Token Pruning for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7040-7049} }
SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks: Thiru Thillai Nadarasar Bahavan,

Sachith Seneviratne,

Saman Halgamuge; [pdf] [supp]
[bibtex]
@InProceedings{Bahavan_2026_CVPR, author = {Bahavan, Thiru Thillai Nadarasar and Seneviratne, Sachith and Halgamuge, Saman}, title = {SpHOR: A Representation Learning Perspective on Open-set Recognition for Identifying Unknown Classes in Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6901-6910} }
coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation: Chunhan Li,

Qifeng Wu,

Jia-Hui Pan,

Ka-Hei Hui,

Jingyu Hu,

Yuming Jiang,

Bin Sheng,

Xihui Liu,

Wenjuan Gong,

Zhengzhe Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunhan and Wu, Qifeng and Pan, Jia-Hui and Hui, Ka-Hei and Hu, Jingyu and Jiang, Yuming and Sheng, Bin and Liu, Xihui and Gong, Wenjuan and Liu, Zhengzhe}, title = {coDrawAgents: A Multi-Agent Dialogue Framework for Compositional Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9802-9812} }
Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings: Bumjun Kim,

Albert No; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Bumjun and No, Albert}, title = {Memorization In Stable Diffusion Is Unexpectedly Driven by CLIP Embeddings}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7988-7997} }
Recursive Think-Answer Process for LLMs and VLMs: Byung-Kwan Lee,

Youngchae Chee,

Yong Man Ro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Byung-Kwan and Chee, Youngchae and Ro, Yong Man}, title = {Recursive Think-Answer Process for LLMs and VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9608-9621} }
FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning: Qinghui Gong,

Xue Yang,

Xunlei Chen,

Jinshan Lai,

Hua Meng,

Xiaohu Tang; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Qinghui and Yang, Xue and Chen, Xunlei and Lai, Jinshan and Meng, Hua and Tang, Xiaohu}, title = {FedOrtho: Efficient Federated Unlearning Via Orthogonal Convolution and Adaptive Soft Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8009-8018} }
VoQA: Visual-only Question Answering: Jianing An,

Luyang Jiang,

Jie Luo,

Wenjun Wu,

Lei Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{An_2026_CVPR, author = {An, Jianing and Jiang, Luyang and Luo, Jie and Wu, Wenjun and Huang, Lei}, title = {VoQA: Visual-only Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9100-9109} }
IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment: Zichen Zhu,

Yuheng Sun,

Mingxuan Zhu,

Wenjie Ma,

Situo Zhang,

Zhexiang Wang,

Ziyue Yang,

Danyang Zhang,

Kunyao Lan,

Zihan Zhao,

Dingye Liu,

Siqi Xiang,

Lu Chen,

Kai Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zichen and Sun, Yuheng and Zhu, Mingxuan and Ma, Wenjie and Zhang, Situo and Wang, Zhexiang and Yang, Ziyue and Zhang, Danyang and Lan, Kunyao and Zhao, Zihan and Liu, Dingye and Xiang, Siqi and Chen, Lu and Yu, Kai}, title = {IEA: Amateur-Friendly Conversational Image Editing Agent via Three Stages of Multitask Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8672-8683} }
SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models: Quentin Guimard,

Federico Bartsch,

Simone Caldarella,

Rahaf Aljundi,

Elisa Ricci,

Massimiliano Mancini; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guimard_2026_CVPR, author = {Guimard, Quentin and Bartsch, Federico and Caldarella, Simone and Aljundi, Rahaf and Ricci, Elisa and Mancini, Massimiliano}, title = {SEM: Sparse Embedding Modulation for Post-Hoc Debiasing of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8101-8110} }
Indexing Multimodal Language Models for Large-scale Image Retrieval: Bahey Tharwat,

Giorgos Kordopatis-Zilos,

Pavel Suma,

Ian Reid,

Giorgos Tolias; [pdf] [supp]
[bibtex]
@InProceedings{Tharwat_2026_CVPR, author = {Tharwat, Bahey and Kordopatis-Zilos, Giorgos and Suma, Pavel and Reid, Ian and Tolias, Giorgos}, title = {Indexing Multimodal Language Models for Large-scale Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6737-6747} }
MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models: Xiyang Wu,

Zongxia Li,

Jihui Jin,

Gouthaman KV,

Vishnu Raj,

Nilotpal Sinha,

Jingxi Chen,

Fan Du,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xiyang and Li, Zongxia and Jin, Jihui and KV, Gouthaman and Raj, Vishnu and Sinha, Nilotpal and Chen, Jingxi and Du, Fan and Manocha, Dinesh}, title = {MASS: Motion-Aware Spatial-temporal Grounding for Physics Reasoning and Comprehension in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9433-9443} }
MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors: Zhipeng Du,

Duolikun Danier,

Jan Eric Lenssen,

Hakan Bilen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Zhipeng and Danier, Duolikun and Lenssen, Jan Eric and Bilen, Hakan}, title = {MoonSeg3R: Monocular Online Zero-Shot Segment Anything in 3D with Reconstructive Foundation Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7304-7314} }
VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack: Shiji Zhao,

Shukun Xiong,

Yao Huang,

Jin Yan,

Zhenyu Wu,

Jiyang Guan,

Ranjie Duan,

Jialing Tao,

Hui Xue,

Xingxing Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Shiji and Xiong, Shukun and Huang, Yao and Yan, Jin and Wu, Zhenyu and Guan, Jiyang and Duan, Ranjie and Tao, Jialing and Xue, Hui and Wei, Xingxing}, title = {VRSA: Jailbreaking Multimodal Large Language Models through Visual Reasoning Sequential Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9412-9421} }
Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation: Zhe Huang,

Hao Wen,

Aiming Hao,

Bingze Song,

Meiqi Wu,

Jiahong Wu,

Xiangxiang Chu,

Sheng Lu,

Haoqian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhe and Wen, Hao and Hao, Aiming and Song, Bingze and Wu, Meiqi and Wu, Jiahong and Chu, Xiangxiang and Lu, Sheng and Wang, Haoqian}, title = {Taming Hallucinations: Boosting MLLMs' Video Understanding via Counterfactual Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8153-8163} }
Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration: Bowen Tang,

Tao Wang,

Miao Zhang,

Xin Yu,

Jinwei Chen,

Bo Li,

Kaihao Zhang; [pdf]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bowen and Wang, Tao and Zhang, Miao and Yu, Xin and Chen, Jinwei and Li, Bo and Zhang, Kaihao}, title = {Evolutionary Multi-Agent Collaboration for Real-World Video Face Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8175-8184} }
FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning: Jintong Gao,

He Zhao,

Yibo Yang,

Dandan Guo; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Jintong and Zhao, He and Yang, Yibo and Guo, Dandan}, title = {FedNPC: Stochastic Noise-driven Post-hoc Classifier Calibration Method for Federated Long-tailed Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7737-7746} }
PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation: Gabriele Rosi,

Fabio Cermelli,

Carlo Masone,

Barbara Caputo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosi_2026_CVPR, author = {Rosi, Gabriele and Cermelli, Fabio and Masone, Carlo and Caputo, Barbara}, title = {PrAda: Few-Shot Visual Adaptation for Text-Prompted Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7326-7336} }
GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models: Cong Ray,

Xiangwen Deng,

Feice Huang,

ZhengXian Wu,

Shen'ao Jiang,

Peng Jiao,

Zhifang Liu,

Haoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Cong and Deng, Xiangwen and Huang, Feice and Wu, ZhengXian and Jiang, Shen'ao and Jiao, Peng and Liu, Zhifang and Wang, Haoqian}, title = {GDP: Graph-Based Dynamic Personalization for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9195-9204} }
Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation: Yeongsu Kim,

Seo-Yeon Choi,

Kyungsu Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Yeongsu and Choi, Seo-Yeon and Lee, Kyungsu}, title = {Human-Intervention Segmentation via Federated Intent Embedding and Multi-Mask Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8857-8867} }
Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking: Yuichiro Takeuchi,

Yusuke Imoto,

Shunya Kato; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Takeuchi_2026_CVPR, author = {Takeuchi, Yuichiro and Imoto, Yusuke and Kato, Shunya}, title = {Ninja Codes: Neurally Generated Fiducial Markers for Stealthy 6-DoF Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6880-6889} }
Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?: Jie Zhu,

Yiyang Su,

Xiaoming Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Jie and Su, Yiyang and Liu, Xiaoming}, title = {Can Textual Reasoning Improve the Performance of MLLMs on Fine-Grained Visual Classification?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9401-9411} }
Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation: Jiaqi Bai,

Hongcheng Guo,

Jiaheng Liu,

Zhibo Zhou,

Jian Yang,

Feiran Huang; [pdf]
[bibtex]
@InProceedings{Bai_2026_CVPR, author = {Bai, Jiaqi and Guo, Hongcheng and Liu, Jiaheng and Zhou, Zhibo and Yang, Jian and Huang, Feiran}, title = {Alleviating Hallucinations in Large Vision-Language Models via Decoding-Time Perturbation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9867-9877} }
Trajectory-Diversity-Driven Robust Vision-and-Language Navigation: Jiangyang Li,

Cong Wan,

SongLin Dong,

Chenhao Ding,

Qiang Wang,

Zhiheng Ma,

Yihong Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jiangyang and Wan, Cong and Dong, SongLin and Ding, Chenhao and Wang, Qiang and Ma, Zhiheng and Gong, Yihong}, title = {Trajectory-Diversity-Driven Robust Vision-and-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9143-9154} }
Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs: Yuhui Lin,

Siyue Yu,

Yuxing Yang,

Guangliang Cheng,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yuhui and Yu, Siyue and Yang, Yuxing and Cheng, Guangliang and Xiao, Jimin}, title = {Efficient3D : A Unified Framework for Adaptive and Debiased Token Reduction in 3D MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8941-8951} }
Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection: Wanqi Wang,

Jingcai Guo,

Yuxiang Cai,

Zhi Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Wanqi and Guo, Jingcai and Cai, Yuxiang and Chen, Zhi}, title = {Learning Multi-Modal Prototypes for Cross-Domain Few-Shot Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7747-7757} }
Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning: Fankang Xu,

Lu Jin,

Yanpeng Sun,

Shiyu Xuan,

Zechao Li; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Fankang and Jin, Lu and Sun, Yanpeng and Xuan, Shiyu and Li, Zechao}, title = {Class-Aware Drift Compensation for Non-Uniform Semantic Shift in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7717-7727} }
NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation: Quang Dang Huynh,

Xuefei Yin,

Andrew Busch,

Hugo G. Espinosa,

Alan Wee-Chung Liew,

Matthew T.O. Worsey,

Yanming Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huynh_2026_CVPR, author = {Huynh, Quang Dang and Yin, Xuefei and Busch, Andrew and Espinosa, Hugo G. and Liew, Alan Wee-Chung and Worsey, Matthew T.O. and Zhu, Yanming}, title = {NCSTR: Node-Centric Decoupled Spatio-Temporal Reasoning for Video-based Human Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8368-8377} }
OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition: Junhui Yin,

Zhizhen Cai,

Puze Wang,

Guanzhou Ke,

Jianhua Yang,

Man Zhang,

Qiang Zhang,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Junhui and Cai, Zhizhen and Wang, Puze and Ke, Guanzhou and Yang, Jianhua and Zhang, Man and Zhang, Qiang and He, Shengfeng}, title = {OKGraph: Online Knowledge Graph Probing for Open-vocabulary Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6717-6726} }
RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning: Suhang Hu,

Wei Hu,

Yuhang Su,

Fan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Suhang and Hu, Wei and Su, Yuhang and Zhang, Fan}, title = {RISE: Enhancing VLM Image Annotation with Self-Supervised Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9878-9887} }
Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss: Hongye Xu,

Bartosz Krawczyk; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Hongye and Krawczyk, Bartosz}, title = {Revisiting Prototype Rehearsal for Exemplar-Free Continual Learning: Manifold-Aware Boundary Sampling with Adaptive Class-Balanced Loss}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7852-7861} }
DARTS: Distance-Aware Robust Training for Selective Classification: A. Q. M. Sazzad Sayyed,

Nathaniel D. Bastian,

Francesco Restuccia; [pdf] [supp]
[bibtex]
@InProceedings{Sayyed_2026_CVPR, author = {Sayyed, A. Q. M. Sazzad and Bastian, Nathaniel D. and Restuccia, Francesco}, title = {DARTS: Distance-Aware Robust Training for Selective Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8806-8815} }
Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model: Zhiming Liu,

Yujie Wei,

Lei Feng,

Xiu Su,

Xiaobo Xia,

Weili Guan,

Zeke Xie,

Shuo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zhiming and Wei, Yujie and Feng, Lei and Su, Xiu and Xia, Xiaobo and Guan, Weili and Xie, Zeke and Yang, Shuo}, title = {Do All Individual Layers Help? An Empirical Study of Task-Interfering Layers in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9597-9607} }
D^2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting: Xiaoai Wang,

Hang Wang,

Yan Liu,

Huan Hu,

Bruce X.B. Yu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaoai and Wang, Hang and Liu, Yan and Hu, Huan and Yu, Bruce X.B.}, title = {D{\textasciicircum}2-STX: Decoupling Spatial-Temporal Cross-Attention for Dual-branch Repetitive Action Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8205-8214} }
Efficient Unlearning through Maximizing Relearning Convergence Delay: Khoa Tran,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Khoa and Woo, Simon S.}, title = {Efficient Unlearning through Maximizing Relearning Convergence Delay}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7968-7977} }
Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation: Jacey Somers,

Harrison Zale,

Janine Mason,

Tina Walker,

Eddie Quinn,

Felix Lewis,

Gavin Wright,

Yvonne Young,

Charles Sullivan,

Wayne Carter,

Julian Foster; [pdf]
[bibtex]
@InProceedings{Somers_2026_CVPR, author = {Somers, Jacey and Zale, Harrison and Mason, Janine and Walker, Tina and Quinn, Eddie and Lewis, Felix and Wright, Gavin and Young, Yvonne and Sullivan, Charles and Carter, Wayne and Foster, Julian}, title = {Video Inspector: An Agentic-RL Framework and Benchmark for Human-Aligned Generative Video Evaluation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8534-8543} }
Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding: Yuefei Chen,

Jiang Liu,

Xiaodong Lin,

Ruixiang Tang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuefei and Liu, Jiang and Lin, Xiaodong and Tang, Ruixiang}, title = {Distilling Counterfactual Reasoning from Language to Vision: Causal Graph-Guided Post-Training for Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9165-9174} }
Seeing Helps Reasoning in Language Models: Yulu Gan,

Kaiya Ivy Zhao,

Tomaso Poggio,

Phillip Isola; [pdf] [supp]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Yulu and Zhao, Kaiya Ivy and Poggio, Tomaso and Isola, Phillip}, title = {Seeing Helps Reasoning in Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7080-7090} }
TAPNext++: What's Next for Tracking Any Point (TAP)?: Sebastian Jung,

Artem Zholus,

Martin Sundermeyer,

Carl Doersch,

Ross Goroshin,

David Joseph Tan,

Sarath Chandar,

Rudolph Triebel,

Federico Tombari; [pdf] [supp]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Sebastian and Zholus, Artem and Sundermeyer, Martin and Doersch, Carl and Goroshin, Ross and Tan, David Joseph and Chandar, Sarath and Triebel, Rudolph and Tombari, Federico}, title = {TAPNext++: What's Next for Tracking Any Point (TAP)?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8429-8438} }
From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage: Cihan Ruan,

Lebin Zhou,

Bingqing Zhao,

Rongduo Han,

Qiming Yuan,

Chenchen Zhu,

Linyi Han,

Liang Yang,

Wei Wang,

Wei Jiang,

Nam Ling; [pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Cihan and Zhou, Lebin and Zhao, Bingqing and Han, Rongduo and Yuan, Qiming and Zhu, Chenchen and Han, Linyi and Yang, Liang and Wang, Wei and Jiang, Wei and Ling, Nam}, title = {From Pixels to Nucleotides: End-to-End Token-Based Video Compression for DNA Storage}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8544-8553} }
A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing: Shiwei Ding,

Lan Zhang,

Zhenlin Wang,

Xiaoyong Yuan; [pdf]
[bibtex]
@InProceedings{Ding_2026_CVPR, author = {Ding, Shiwei and Zhang, Lan and Wang, Zhenlin and Yuan, Xiaoyong}, title = {A Unified Privacy-Utility Framework for Collaborative Inference via Randomized Smoothing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8039-8048} }
ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation: Wenyang Chen,

Zhanxuan Hu,

Yaping Zhang,

Hailong Ning,

Yonghang Tai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Wenyang and Hu, Zhanxuan and Zhang, Yaping and Ning, Hailong and Tai, Yonghang}, title = {ConInfer: Context-Aware Inference for Training-Free Open-Vocabulary Remote Sensing Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7408-7418} }
Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination: Yolo Yunlong Tang,

Daiki Shimada,

Hang Hua,

Chao Huang,

Jing Bi,

Rogerio Feris,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Yolo Yunlong and Shimada, Daiki and Hua, Hang and Huang, Chao and Bi, Jing and Feris, Rogerio and Xu, Chenliang}, title = {Video-R4: Reinforcing Text-Rich Video Reasoning with Visual Rumination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8314-8325} }
Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning: Yushuo Zheng,

Huiyu Duan,

Zicheng Zhang,

Xiaohong Liu,

Xiongkuo Min; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Yushuo and Duan, Huiyu and Zhang, Zicheng and Liu, Xiaohong and Min, Xiongkuo}, title = {Learning to Wander: Improving the Global Image Geolocation Ability of LMMs via Actionable Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7208-7219} }
Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework: Hongrui Jia,

Chaoya Jiang,

Shikun Zhang,

Wei Ye; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Hongrui and Jiang, Chaoya and Zhang, Shikun and Ye, Wei}, title = {Mitigating Visual Context Degradation in Large Multimodal Models: A Training-Free Decoupled Agentic Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9574-9585} }
MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer: Jian Zhong,

Yifan Jiao,

Xi Shao,

Bing-Kun Bao; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Jian and Jiao, Yifan and Shao, Xi and Bao, Bing-Kun}, title = {MuSCM: Mutual Spatial Correlation Mapping for Class Incremental Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7758-7767} }
Learning to Select Visual In-Context Demonstrations: Eugene Lee,

Yu-Chi Lin,

Jiajie Diao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Eugene and Lin, Yu-Chi and Diao, Jiajie}, title = {Learning to Select Visual In-Context Demonstrations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9455-9465} }
MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation: Ziyi Wang,

Xianping Ma,

Ziyao Wang,

Hongyang Zhang,

Man On Pun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ziyi and Ma, Xianping and Wang, Ziyao and Zhang, Hongyang and Pun, Man On}, title = {MPerS: Dynamic MLLM MixExperts Perception-Guided Remote Sensing Scene Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7251-7261} }
Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization: Qiji Ma,

Chuanguang Yang,

Zhulin An,

Libo Huang,

Erhu Zhao,

Yuqi Li,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Qiji and Yang, Chuanguang and An, Zhulin and Huang, Libo and Zhao, Erhu and Li, Yuqi and Xu, Yongjun}, title = {Distilling Out-of-Distribution Knowledge from Large Language Models for CLIP Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9531-9541} }
PTAD: Pose and Texture Agnostic Anomaly Detection: Wei Zhuo,

Jianen Xiang,

Miaomiao Liu,

Huajun Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuo_2026_CVPR, author = {Zhuo, Wei and Xiang, Jianen and Liu, Miaomiao and Lu, Huajun}, title = {PTAD: Pose and Texture Agnostic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6779-6788} }
Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation: Junhao Xia,

Chaoyang Zhang,

Yecheng Zhang,

Chengyang Zhou,

Zhichang Wang,

Bochun Liu,

Dongshuo Yin; [pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Junhao and Zhang, Chaoyang and Zhang, Yecheng and Zhou, Chengyang and Wang, Zhichang and Liu, Bochun and Yin, Dongshuo}, title = {Dual-Stage Parameter-Efficient Fine-Tuning for Consistent Spatial and Temporal Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8607-8617} }
GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework: Pengyu Zeng,

Yuqin Dai,

Jun Yin,

Jing Zhong,

Ziyang Han,

Chaoyang Shi,

ZhanXiang Jin,

Maowei Jiang,

Yuxing Han,

Shuai Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Pengyu and Dai, Yuqin and Yin, Jun and Zhong, Jing and Han, Ziyang and Shi, Chaoyang and Jin, ZhanXiang and Jiang, Maowei and Han, Yuxing and Lu, Shuai}, title = {GreenPlanner: Practical Floorplan Layout Generation via an Energy-Aware and Function-Feasible Generative Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8596-8606} }
Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization: Xiaoxi Yang,

Bo Sun,

Yisheng An,

Ganchao Liu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xiaoxi and Sun, Bo and An, Yisheng and Liu, Ganchao}, title = {Entropy-Constrained Information Optimal Transport for Multi-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7188-7197} }
Label-Agnostic Category Discovery: Yuwei Bian,

Shidong Wang,

Chunming Li,

Haofeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Bian_2026_CVPR, author = {Bian, Yuwei and Wang, Shidong and Li, Chunming and Zhang, Haofeng}, title = {Label-Agnostic Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7573-7582} }
Autoregressive Universal Video Segmentation Model: Miran Heo,

Sukjun Hwang,

Min-Hung Chen,

Yu-Chiang Frank Wang,

Albert Gu,

Seon Joo Kim,

Ryo Hachiuma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Heo_2026_CVPR, author = {Heo, Miran and Hwang, Sukjun and Chen, Min-Hung and Wang, Yu-Chiang Frank and Gu, Albert and Kim, Seon Joo and Hachiuma, Ryo}, title = {Autoregressive Universal Video Segmentation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7429-7438} }
Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers: Sayan Kumar Chaki,

Thierry Fournel,

Rémi Emonet; [pdf] [supp]
[bibtex]
@InProceedings{Chaki_2026_CVPR, author = {Chaki, Sayan Kumar and Fournel, Thierry and Emonet, R\'emi}, title = {Equivariant Unsupervised Object Detection with Learnable Riesz Transform and Composite Spatial Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7008-7017} }
Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models: Xi Zhang,

Hanwei Zhu,

Jiamang Wang,

Xiaolin Wu,

Weisi Lin; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xi and Zhu, Hanwei and Wang, Jiamang and Wu, Xiaolin and Lin, Weisi}, title = {Modality-Aware Bit Allocation for Mixed-Precision Quantization of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9305-9315} }
Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination: Xinzhuo Li,

Adheesh Juvekar,

Jiaxun Zhang,

Xingyou Liu,

Muntasir Wahed,

Kiet A. Nguyen,

Yifan Shen,

Tianjiao Yu,

Ismini Lourentzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinzhuo and Juvekar, Adheesh and Zhang, Jiaxun and Liu, Xingyou and Wahed, Muntasir and Nguyen, Kiet A. and Shen, Yifan and Yu, Tianjiao and Lourentzou, Ismini}, title = {Counterfactual Segmentation Reasoning: Diagnosing and Mitigating Pixel-Grounding Hallucination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7450-7460} }
MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling: Sicong Leng,

Jing Wang,

Jiaxi Li,

Hao Zhang,

Zhiqiang Hu,

Boqiang Zhang,

Yuming Jiang,

Hang Zhang,

Xin Li,

Deli Zhao,

Wei Lu,

Yu Rong,

Aixin Sun,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2026_CVPR, author = {Leng, Sicong and Wang, Jing and Li, Jiaxi and Zhang, Hao and Hu, Zhiqiang and Zhang, Boqiang and Jiang, Yuming and Zhang, Hang and Li, Xin and Zhao, Deli and Lu, Wei and Rong, Yu and Sun, Aixin and Lu, Shijian}, title = {MMR1: Enhancing Multimodal Reasoning with Variance-Aware Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9075-9087} }
Seeing Through Fog: Towards Fog-Invariant Action Recognition: Enqi Liu,

Liyuan Pan,

Zhi Gao,

Lingzhi Li,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Enqi and Pan, Liyuan and Gao, Zhi and Li, Lingzhi and Li, Qing}, title = {Seeing Through Fog: Towards Fog-Invariant Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6966-6975} }
Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory: Cheng-Yen Yang,

Hsiang-Wei Huang,

Kuang-Ming Chen,

Kunjun Li,

Jenq-Neng Hwang; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Cheng-Yen and Huang, Hsiang-Wei and Chen, Kuang-Ming and Li, Kunjun and Hwang, Jenq-Neng}, title = {Extending Segment Anything Model 2 to Multi-Object Tracking by Optimizing Hierarchical Trajectory Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8358-8367} }
From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval: Yi-Xiang Zhang,

Yu-Shuen Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Xiang and Wang, Yu-Shuen}, title = {From Alignment to Reason: Multi-Agent Debate for Tactical Badminton Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9520-9530} }
ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval: Haiming Zhao,

Tai Wang; [pdf]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Haiming and Wang, Tai}, title = {ConfDiff: Confidence-Guided Representation Diffusion for Video Moment Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8164-8174} }
Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning: Jiayi Wang,

Wei Dai,

Haoyu Wang,

Sihan Yang,

Haixia Bi,

Jian Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jiayi and Dai, Wei and Wang, Haoyu and Yang, Sihan and Bi, Haixia and Sun, Jian}, title = {Continual Alignment for SAM: Rethinking Foundation Models for Medical Image Segmentation in Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7520-7529} }
GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation: Chaewon Lee,

JunHyeok Heo,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Chaewon and Heo, JunHyeok and Kim, Chang-Su}, title = {GATE: Gaussian-Attentive Transformer for Uncertainty-Aware Age Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8736-8745} }
Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems: YuChe Hsu,

AnJui Wang,

TsaiChing Ni,

YuanFu Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Hsu_2026_CVPR, author = {Hsu, YuChe and Wang, AnJui and Ni, TsaiChing and Yang, YuanFu}, title = {Generative Digital Twins: Vision-Language Simulation Models for Executable Industrial Systems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8705-8714} }
MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning: Zhongyu Wang,

Pengbo Liu; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhongyu and Liu, Pengbo}, title = {MARS-RL: Enhancing Multi-Agent RAG Systems for Multi-Modal Documents via Strategic Reasoning with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9674-9683} }
SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation: Vishal Thengane,

Zhaochong An,

Tianjin Huang,

Son Lam Phung,

Abdesselam Bouzerdoum,

Lu Yin,

Na Zhao,

Xiatian Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Thengane_2026_CVPR, author = {Thengane, Vishal and An, Zhaochong and Huang, Tianjin and Phung, Son Lam and Bouzerdoum, Abdesselam and Yin, Lu and Zhao, Na and Zhu, Xiatian}, title = {SCOPE: Scene-Contextualized Incremental Few-Shot 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7368-7377} }
From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness: My H. Dinh,

Aditya Sant,

Akshay Malhotra,

Keya Patani,

Shahab Hamidi-Rad; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dinh_2026_CVPR, author = {Dinh, My H. and Sant, Aditya and Malhotra, Akshay and Patani, Keya and Hamidi-Rad, Shahab}, title = {From Fewer Samples to Fewer Bits: Reframing Dataset Distillation as Joint Optimization of Precision and Compactness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7070-7079} }
UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding: Shuquan Lian,

Yuhang Wu,

Jia Ma,

Yifan Ding,

Zihan Song,

Bingqi Chen,

Xiawu Zheng,

Hui Li,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lian_2026_CVPR, author = {Lian, Shuquan and Wu, Yuhang and Ma, Jia and Ding, Yifan and Song, Zihan and Chen, Bingqi and Zheng, Xiawu and Li, Hui and Ji, Rongrong}, title = {UI-AGILE: Advancing GUI Agents with Effective Reinforcement Learning and Precise Inference-Time Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8575-8584} }
SCP: Spatial Causal Prediction in Video: Yanguang Zhao,

Jie Yang,

Shengqiong Wu,

Shutong Hu,

Hongbo Qiu,

Yu Wang,

Guijia Zhang,

Tan Kai Ze,

Hao Fei,

Chia-Wen Lin,

Mong-Li Lee,

Wynne Hsu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Yanguang and Yang, Jie and Wu, Shengqiong and Hu, Shutong and Qiu, Hongbo and Wang, Yu and Zhang, Guijia and Ze, Tan Kai and Fei, Hao and Lin, Chia-Wen and Lee, Mong-Li and Hsu, Wynne}, title = {SCP: Spatial Causal Prediction in Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7165-7175} }
CineMatte: Background Matting for Virtual Production and Beyond: Yuanjian He,

Chen Zhang,

Fasheng Chen,

Jiangbo Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanjian and Zhang, Chen and Chen, Fasheng and Cao, Jiangbo}, title = {CineMatte: Background Matting for Virtual Production and Beyond}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8725-8735} }
DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations: Cyril Kana Tepakbong,

Kévin Bouchard,

Julien Maitre; [pdf] [supp]
[bibtex]
@InProceedings{Tepakbong_2026_CVPR, author = {Tepakbong, Cyril Kana and Bouchard, K\'evin and Maitre, Julien}, title = {DeepDP-TGMM: Amortized Non-Parametric Clustering for Hyperspherical Self-Supervised Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7483-7492} }
Complexity of Linear Regions in Self-supervised Deep ReLU Networks: Mufhumudzi Muthivhi,

Terence L. van Zyl; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Muthivhi_2026_CVPR, author = {Muthivhi, Mufhumudzi and van Zyl, Terence L.}, title = {Complexity of Linear Regions in Self-supervised Deep ReLU Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6911-6920} }
Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity: Peicheng Zhou,

Shancheng Fang,

Chenhui Jin,

Bowei Pu,

Hongtao Xie; [pdf]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Peicheng and Fang, Shancheng and Jin, Chenhui and Pu, Bowei and Xie, Hongtao}, title = {Towards Universal Open-Set Visual Font Recognition Via Augmented Synthetic Similarity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6799-6808} }
DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation: Dhenenjay Yadav,

Rohan Sawai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yadav_2026_CVPR, author = {Yadav, Dhenenjay and Sawai, Rohan}, title = {DARN: Dynamic Adaptive Regularization Networks for Efficient and Robust Foundation Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7625-7633} }
Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning: Zhilong Mao,

Hang Zhang,

Yanmin Li,

Lihua Liu,

Jibing Wu,

Mao Wang; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Zhilong and Zhang, Hang and Li, Yanmin and Liu, Lihua and Wu, Jibing and Wang, Mao}, title = {Dynamic Pseudo-Label Assignment and Consistent Prototypical Learning for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7810-7819} }
Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding: Yanan Zhu,

Ziwei Xiang,

Jiamin Wu,

Jinyang Guo,

Hongyuan Zhang,

Chunfeng Song,

Hongjian Fang,

Yufei Guo,

Xianglong Liu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yanan and Xiang, Ziwei and Wu, Jiamin and Guo, Jinyang and Zhang, Hongyuan and Song, Chunfeng and Fang, Hongjian and Guo, Yufei and Liu, Xianglong}, title = {Region-Aware Hierarchical Sub-Feature Alignment for Robust EEG-Based Visual Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6667-6676} }
STORM: End-to-End Referring Multi-Object Tracking in Videos: Zijia Lu,

Jingru Yi,

Jue Wang,

Yuxiao Chen,

Junwen Chen,

Xinyu Li,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zijia and Yi, Jingru and Wang, Jue and Chen, Yuxiao and Chen, Junwen and Li, Xinyu and Modolo, Davide}, title = {STORM: End-to-End Referring Multi-Object Tracking in Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8347-8357} }
Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning: Thinh Nguyen,

Le Huy Khiem,

Van-Tuan Tran,

Khoa D Doan,

Nitesh V. Chawla,

Kok-Seng Wong; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Thinh and Khiem, Le Huy and Tran, Van-Tuan and Doan, Khoa D and Chawla, Nitesh V. and Wong, Kok-Seng}, title = {Onboarding Without Forgetting: Hypernetwork Personalization with Data-Free Replay for Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7728-7736} }
Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance: Jia Li,

Zhankai Li,

Yongqiang Yu,

Xuehu Yan,

Yuliang Lu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jia and Li, Zhankai and Yu, Yongqiang and Yan, Xuehu and Lu, Yuliang}, title = {Towards Universal and Lightweight Coverless Image Steganography with Multimodal Large Language Models Assistance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7935-7944} }
UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation: Wufei Ma,

Sky Cen,

Jianzhi Shen,

Rex Lee,

León Begiristain,

Yan Zhuang,

Jiawei Peng,

Zhifei Yu,

Tianao Song,

Xinyuan Qi,

Tianmin Shu,

Adam Kortylewski,

Alan Yuille; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Wufei and Cen, Sky and Shen, Jianzhi and Lee, Rex and Begiristain, Le\'on and Zhuang, Yan and Peng, Jiawei and Yu, Zhifei and Song, Tianao and Qi, Xinyuan and Shu, Tianmin and Kortylewski, Adam and Yuille, Alan}, title = {UnrealSpace: Analyzing Spatial Understanding and Reasoning in Controllable Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9716-9725} }
CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning: Xinyu Li,

Shiliang Sun; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xinyu and Sun, Shiliang}, title = {CoVCR: Bridging Visual Narrative Gaps via Context Generation for Robust Commonsense Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9065-9074} }
GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation: Changqun Feng,

Wangxiandi Yin,

Xin Hu,

Lei Zhao,

Dongyang Zhang,

Tao He; [pdf]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Changqun and Yin, Wangxiandi and Hu, Xin and Zhao, Lei and Zhang, Dongyang and He, Tao}, title = {GEODE: Geometry-Guided Discrete Diffusion for Open-Vocabulary 3D Scene Graph Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7143-7153} }
IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection: Qian Xu,

Shuaipeng Fan,

Fei Gao,

Mingjin Zhang; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Qian and Fan, Shuaipeng and Gao, Fei and Zhang, Mingjin}, title = {IRDINO: Adapting DINOv3 with Second-Order Motion Awareness for Moving Infrared Small Target Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8408-8418} }
Multimodal Reasoning with Explicit Reasoning Patterns and Rewards: Han Qiu,

Sheng Jin,

Zhongrong Zuo,

Ziyue Wang,

Qi She,

Ling Shao,

Shijian Lu; [pdf] [supp]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Han and Jin, Sheng and Zuo, Zhongrong and Wang, Ziyue and She, Qi and Shao, Ling and Lu, Shijian}, title = {Multimodal Reasoning with Explicit Reasoning Patterns and Rewards}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9542-9551} }
Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models: Zheyuan Gu,

Qingsong Zhao,

Yusong Wang,

Zhaohong Huang,

Xinqi Li,

Chen Yuan,

Jiawei Shao,

Chi Zhang,

Xuelong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zheyuan and Zhao, Qingsong and Wang, Yusong and Huang, Zhaohong and Li, Xinqi and Yuan, Chen and Shao, Jiawei and Zhang, Chi and Li, Xuelong}, title = {Beyond Static Artifacts: A Forensic Benchmark for Video Deepfake Reasoning in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8585-8595} }
Harmonized Multi-Layer Text-to-Image Generation with Generative Priors: Yusuf Dalva,

Yijun Li,

Qing Liu,

Nanxuan Zhao,

Jianming Zhang,

Zhe Lin,

Pinar Yanardag; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dalva_2026_CVPR, author = {Dalva, Yusuf and Li, Yijun and Liu, Qing and Zhao, Nanxuan and Zhang, Jianming and Lin, Zhe and Yanardag, Pinar}, title = {Harmonized Multi-Layer Text-to-Image Generation with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8471-8480} }
100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing: Cunqi Wu,

Peng Zhou,

Jie Qin,

Qi Tian; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Cunqi and Zhou, Peng and Qin, Jie and Tian, Qi}, title = {100Editor: 100+ Views per Batch and Minute-Scale View-Consistent 3D Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8449-8460} }
Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information: Yinbo Liu,

Qi Wu,

Keyang Ye,

Xiao He,

Tian Tian; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yinbo and Wu, Qi and Ye, Keyang and He, Xiao and Tian, Tian}, title = {Predicting Gene Expression in Spatially Resolved Transcriptomics Across Samples Through Probabilistic Fusion of Hierarchical Histology and Spatial Information}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8492-8503} }
Face Time Traveller : Travel Through Ages Without Losing Identity: Purbayan Kar,

Ayush Ghadiya,

Vishal Chudasama,

Pankaj Wasnik,

C.V. Jawahar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kar_2026_CVPR, author = {Kar, Purbayan and Ghadiya, Ayush and Chudasama, Vishal and Wasnik, Pankaj and Jawahar, C.V.}, title = {Face Time Traveller : Travel Through Ages Without Losing Identity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8756-8765} }
CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering: Zai Yang Yu,

Changshuo Wang,

Yuan Shi,

Linjun Sun,

Shu Wei,

Tingran Wang,

Wangyu Wu,

Yanjie Li,

Weijun Li; [pdf]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Zai Yang and Wang, Changshuo and Shi, Yuan and Sun, Linjun and Wei, Shu and Wang, Tingran and Wu, Wangyu and Li, Yanjie and Li, Weijun}, title = {CADRNet: Cognitively-Inspired Active Vision for 3D Reasoning Segmentation via Differentiable Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7220-7230} }
Prompt-driven Small Object Instance Segmentation in Earth Observation: Chenhao Wang,

Yingrui Ji,

Yu Meng,

Yunjian Zhang,

Yao Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenhao and Ji, Yingrui and Meng, Yu and Zhang, Yunjian and Zhu, Yao}, title = {Prompt-driven Small Object Instance Segmentation in Earth Observation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7347-7356} }
Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection: Zhiqiang Yang,

Renshuai Tao,

Chunjie Zhang,

Guodong Yang,

Xiaolong Zheng,

Yao Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhiqiang and Tao, Renshuai and Zhang, Chunjie and Yang, Guodong and Zheng, Xiaolong and Zhao, Yao}, title = {Leveraging Unlabeled Data from Unknown Sources via Dual-Path Guidance for Deepfake Face Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8090-8100} }
Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning: Yudi Shi,

Shangzhe Di,

Qirui Chen,

Qinian Wang,

Jiayin Cai,

Xiaolong Jiang,

Yao Hu,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Yudi and Di, Shangzhe and Chen, Qirui and Wang, Qinian and Cai, Jiayin and Jiang, Xiaolong and Hu, Yao and Xie, Weidi}, title = {Weaver: End-to-End Agentic System Training for Video Interleaved Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9248-9258} }
Towards Efficient Multimodal Unified Reasoning Model via Model Merging: Qixiang Yin,

Huanjin Yao,

Jianghao Chen,

Jiaxing Huang,

Zhicheng Zhao,

Fei Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Qixiang and Yao, Huanjin and Chen, Jianghao and Huang, Jiaxing and Zhao, Zhicheng and Su, Fei}, title = {Towards Efficient Multimodal Unified Reasoning Model via Model Merging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9378-9388} }
LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation: Yang Zhou,

Shiyu Zhao,

Yuxiao Chen,

Zhenting Wang,

Can Jin,

Mingyu Zhao,

Dimitris N. Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Yang and Zhao, Shiyu and Chen, Yuxiao and Wang, Zhenting and Jin, Can and Zhao, Mingyu and Metaxas, Dimitris N.}, title = {LED: LLM Enhanced Open-Vocabulary Object Detection without Human Curated Data Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9643-9653} }
Hierarchical Textual Knowledge for Enhanced Image Clustering: Yijie Zhong,

Yunfan Gao,

Weipeng Jiang,

Haofen Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2026_CVPR, author = {Zhong, Yijie and Gao, Yunfan and Jiang, Weipeng and Wang, Haofen}, title = {Hierarchical Textual Knowledge for Enhanced Image Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9749-9758} }
Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack: Nanxiang Jiang,

Zhaoxin Fan,

Enhan Kang,

Daiheng Gao,

Yun Zhou,

Yanxia Chang,

Zheng Zhu,

Yeying Jin,

Wenjun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Nanxiang and Fan, Zhaoxin and Kang, Enhan and Gao, Daiheng and Zhou, Yun and Chang, Yanxia and Zhu, Zheng and Jin, Yeying and Wu, Wenjun}, title = {Erased, But Not Forgotten: Erased Rectified Flow Transformers Still Remain Unsafe Under Concept Attack}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8080-8089} }
Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation: Zailong Chen,

Peng Gao,

Johan Barthelemy,

Luping Zhou,

Lei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Zailong and Gao, Peng and Barthelemy, Johan and Zhou, Luping and Wang, Lei}, title = {Analyzing and Enhancing Visual Learning in LLM-based Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9327-9336} }
Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection: Wenxuan Bao,

Yanjun Zhao,

Xiyuan Yang,

Jingrui He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Wenxuan and Zhao, Yanjun and Yang, Xiyuan and He, Jingrui}, title = {Ramen: Robust Test-Time Adaptation of Vision-Language Models with Active Sample Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9632-9642} }
ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning: Mengyang Li,

Ou Wu; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengyang and Wu, Ou}, title = {ECOC-IL: Robust and Efficient Label LDP for Imbalanced Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7914-7923} }
Open World Image Aesthetic Assessment: Mingxiang Liao,

Tianren Ma,

Xijin Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Liao_2026_CVPR, author = {Liao, Mingxiang and Ma, Tianren and Zhang, Xijin}, title = {Open World Image Aesthetic Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9791-9801} }
Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners: Yizheng Gong,

Xiaoyang Wang,

Siyue Yu,

Waleed Al-Nuaimy,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Gong_2026_CVPR, author = {Gong, Yizheng and Wang, Xiaoyang and Yu, Siyue and Al-Nuaimy, Waleed and Xiao, Jimin}, title = {Bootstrap Your Own Classifier: Your Pretrained Vision Models are Secretly Strong Continual Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7665-7674} }
Asymmetric Collaborative Distillation for Asymmetric Image Retrieval: Yi Xie,

Huaidong Zhang,

Xuandi Luo,

Yan Zhou,

Shengfeng He; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yi and Zhang, Huaidong and Luo, Xuandi and Zhou, Yan and He, Shengfeng}, title = {Asymmetric Collaborative Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6706-6716} }
Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation: Weijian Ma,

Shizhao Sun,

Tianyu Yu,

Ruiyu Wang,

Tat-Seng Chua,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Weijian and Sun, Shizhao and Yu, Tianyu and Wang, Ruiyu and Chua, Tat-Seng and Bian, Jiang}, title = {Thinking with Blueprints: Assisting Vision-Language Models in Spatial Reasoning via Structured Object Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8919-8929} }
Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models: Abin Shoby,

Ta Duc Huy,

Tuan Dung Nguyen,

Minh Khoi Ho,

Qi Chen,

Anton van den Hengel,

Phi Le Nguyen,

Johan W. Verjans,

Vu Minh Hieu Phan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shoby_2026_CVPR, author = {Shoby, Abin and Huy, Ta Duc and Nguyen, Tuan Dung and Ho, Minh Khoi and Chen, Qi and van den Hengel, Anton and Le Nguyen, Phi and Verjans, Johan W. and Phan, Vu Minh Hieu}, title = {Overthinking Causes Hallucination: Tracing Confounder Propagation in Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9185-9194} }
Large Multimodal Models as General In-Context Classifiers: Marco Garosi,

Matteo Farina,

Alessandro Conti,

Massimiliano Mancini,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Garosi_2026_CVPR, author = {Garosi, Marco and Farina, Matteo and Conti, Alessandro and Mancini, Massimiliano and Ricci, Elisa}, title = {Large Multimodal Models as General In-Context Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6727-6736} }
Visual Reasoning Through Tool-Supervised Reinforcement Learning: Qihua Dong,

Gozde Sahin,

Pei Wang,

Zhaowei Cai,

Robik Shrestha,

Hao Yang,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Qihua and Sahin, Gozde and Wang, Pei and Cai, Zhaowei and Shrestha, Robik and Yang, Hao and Modolo, Davide}, title = {Visual Reasoning Through Tool-Supervised Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8993-9002} }
DM^3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking: Weiran Li,

Yeqiang Liu,

Yijie Wei,

Mina Han,

Qiannan Guo,

Zhenbo Li; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Weiran and Liu, Yeqiang and Wei, Yijie and Han, Mina and Guo, Qiannan and Li, Zhenbo}, title = {DM{\textasciicircum}3T: Harmonizing Modalities via Diffusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8398-8407} }
Memory-efficient Continual Learning with Prototypical Exemplar Condensation: M.-Duong Nguyen,

Thien-Thanh Dao,

Le-Tuan Nguyen,

Dung D. Le,

Kok-Seng Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, M.-Duong and Dao, Thien-Thanh and Nguyen, Le-Tuan and Le, Dung D. and Wong, Kok-Seng}, title = {Memory-efficient Continual Learning with Prototypical Exemplar Condensation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7675-7685} }
Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA: Yujie Wang,

Hu Zhang,

Jiye Liang,

Zhiqiang Wang,

Hongye Tan,

Ru Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yujie and Zhang, Hu and Liang, Jiye and Wang, Zhiqiang and Tan, Hongye and Li, Ru}, title = {Fine-Grained Visual Prompt and Region Self-Distillation for Retrieval-Augmented VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9282-9293} }
CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking: Zhenyu Wu,

Tengfei Shi,

Xuehao Wang,

Ming Li,

Chenglizhao Chen,

Wenfeng Song,

Aimin Hao; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zhenyu and Shi, Tengfei and Wang, Xuehao and Li, Ming and Chen, Chenglizhao and Song, Wenfeng and Hao, Aimin}, title = {CogNet: Multi-Agent Collaborative Reasoning and Verification for Salient Object Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7241-7250} }
Direct Language Embedding Enables Gaussian Splatting for Large Scenes: Zhida Li,

Jianqiao Zhu,

Hejin Huang,

Yipeng Qin,

Sibei Yang,

Guanbin Li; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Zhida and Zhu, Jianqiao and Huang, Hejin and Qin, Yipeng and Yang, Sibei and Li, Guanbin}, title = {Direct Language Embedding Enables Gaussian Splatting for Large Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7231-7240} }
AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning: Jianheng Tang,

Jingyu He,

Kejia Fan,

Run He,

Jingchao Wang,

Anfeng Liu,

Houbing Herbert Song,

Leye Wang,

Zhanxing Zhu,

Huiping Zhuang,

Yunhuai Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Jianheng and He, Jingyu and Fan, Kejia and He, Run and Wang, Jingchao and Liu, Anfeng and Song, Houbing Herbert and Wang, Leye and Zhu, Zhanxing and Zhuang, Huiping and Liu, Yunhuai}, title = {AFCL: Achieving Spatio-Temporal Invariance to Data Heterogeneity in Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7768-7778} }
DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering: Guillermo Figueroa Araneda,

Iris Dania Jimenez,

Florian Hofherr,

Manny Ko,

Hector Andrade-Loarca,

Daniel Cremers; [pdf] [supp]
[bibtex]
@InProceedings{Araneda_2026_CVPR, author = {Araneda, Guillermo Figueroa and Jimenez, Iris Dania and Hofherr, Florian and Ko, Manny and Andrade-Loarca, Hector and Cremers, Daniel}, title = {DIAMOND-SSS: Diffusion-Augmented Multi-View Optimization for Data-efficient SubSurface Scattering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8461-8470} }
SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery: Meng Cao,

Xingyu Li,

Xue Liu,

Ian Reid,

Xiaodan Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Meng and Li, Xingyu and Liu, Xue and Reid, Ian and Liang, Xiaodan}, title = {SpatialDreamer: Incentivizing Spatial Reasoning via Active Mental Imagery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7176-7187} }
One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition: Balaji Darur,

Amanmeet Garg,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darur_2026_CVPR, author = {Darur, Balaji and Garg, Amanmeet and Tapaswi, Makarand}, title = {One Identity, Many Roles: Multimodal Entity Coreference for Enhanced Video Situation Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8268-8279} }
DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer: Soichiro Okazaki,

Tatsuya Sasaki,

Hiroki Ohashi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Okazaki_2026_CVPR, author = {Okazaki, Soichiro and Sasaki, Tatsuya and Ohashi, Hiroki}, title = {DetRefiner: Model-Agnostic Detection Refinement with Feature Fusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6890-6900} }
DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition: Yang Yang,

Kai Xu,

Junyao Hou,

Miao Zhang,

Xiang Li,

Zhenghua Chen,

Yingxue Gao,

Min Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yang and Xu, Kai and Hou, Junyao and Zhang, Miao and Li, Xiang and Chen, Zhenghua and Gao, Yingxue and Wu, Min}, title = {DEED: Dual-Channel Enhanced Ensemble Distillation for Uncertainty-Aware Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7789-7798} }
SemanticMoments: Training-Free Motion Similarity via Third Moment Features: Saar Huberman,

Kfir Goldberg,

Or Patashnik,

Sagie Benaim,

Ron Mokady; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman_2026_CVPR, author = {Huberman, Saar and Goldberg, Kfir and Patashnik, Or and Benaim, Sagie and Mokady, Ron}, title = {SemanticMoments: Training-Free Motion Similarity via Third Moment Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8419-8428} }
Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations: Toshimichi Aota,

Akinori Hashimoto,

Naoto Sekizuka,

Takayuki Okatani; [pdf] [supp]
[bibtex]
@InProceedings{Aota_2026_CVPR, author = {Aota, Toshimichi and Hashimoto, Akinori and Sekizuka, Naoto and Okatani, Takayuki}, title = {Pre-trained Models Can Count (Almost): Exploring Quantitative Structure in Visual Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6932-6942} }
SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification: Jun Wei Hsieh,

Ying-Hsuan Wu,

Yi-Kuan Hsieh,

Xin Li,

Kuan-Chuan Peng,

Ming-Ching Chang; [pdf]
[bibtex]
@InProceedings{Hsieh_2026_CVPR, author = {Hsieh, Jun Wei and Wu, Ying-Hsuan and Hsieh, Yi-Kuan and Li, Xin and Peng, Kuan-Chuan and Chang, Ming-Ching}, title = {SoREL: Soft-Label Refurbishment with Ensemble Learning for Noisy Long-Tailed Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6839-6848} }
PSIM: Perceptual Similarity Index Measure: Md Eimran Hossain Eimon,

Hari Kalva; [pdf] [supp]
[bibtex]
@InProceedings{Eimon_2026_CVPR, author = {Eimon, Md Eimran Hossain and Kalva, Hari}, title = {PSIM: Perceptual Similarity Index Measure}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8564-8574} }
StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios: Yifei Wang,

Zhenkai Li,

Tianwen Qian,

Huanran Zheng,

Zheng Wang,

Yuqian Fu,

Xiaoling Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yifei and Li, Zhenkai and Qian, Tianwen and Zheng, Huanran and Wang, Zheng and Fu, Yuqian and Wang, Xiaoling}, title = {StreamEQA: Towards Streaming Video Understanding for Embodied Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9422-9432} }
KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification: Christine Dewi,

Dhananjay R Thiruvady,

Nayyar Zaidi; [pdf] [supp]
[bibtex]
@InProceedings{Dewi_2026_CVPR, author = {Dewi, Christine and Thiruvady, Dhananjay R and Zaidi, Nayyar}, title = {KGGAT: Knowledge-Guided Graph Attention Network for Multi-Label Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8766-8775} }
When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements: Chi Zhang,

Yulang Gao,

Jiachen Zou,

Chen Wei,

Quanying Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Gao, Yulang and Zou, Jiachen and Wei, Chen and Liu, Quanying}, title = {When Agents Steer Human Perception: How AI-Selected Images Can Convertly Alter Disagreements}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8111-8120} }
StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes: Souheib Ben Mabrouk,

Jean-Emmanuel Deschaud,

Eva Coupeté,

Thomas Derbanne,

Nicolas Rahmouni; [pdf] [supp]
[bibtex]
@InProceedings{Ben_Mabrouk_2026_CVPR, author = {Ben Mabrouk, Souheib and Deschaud, Jean-Emmanuel and Coupet\'e, Eva and Derbanne, Thomas and Rahmouni, Nicolas}, title = {StabiGS: Video Stabilization through Rendering-Aware Trajectory Optimization in 3DGS-Reconstructed Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8481-8491} }
Mitigating Vision-Text Order Bias in Vision-Language Model: Weilin Gan,

Yifan Song,

Zhuocheng Yu,

Sujian Li; [pdf]
[bibtex]
@InProceedings{Gan_2026_CVPR, author = {Gan, Weilin and Song, Yifan and Yu, Zhuocheng and Li, Sujian}, title = {Mitigating Vision-Text Order Bias in Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9664-9673} }
Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning: Aoqiang Zhu,

Min Hu,

Yan Xing,

Yiming Tang; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Aoqiang and Hu, Min and Xing, Yan and Tang, Yiming}, title = {Decoupled Sub-Feature Uncertainty Modeling for Robust Multimodal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6921-6931} }
ReConText3D: Replay-based Continual Text-to-3D Generation: Muhammad Ahmed Ullah Khan,

Muhammad Haris Bin Amir,

Didier Stricker,

Muhammad Zeshan Afzal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2026_CVPR, author = {Khan, Muhammad Ahmed Ullah and Bin Amir, Muhammad Haris and Stricker, Didier and Afzal, Muhammad Zeshan}, title = {ReConText3D: Replay-based Continual Text-to-3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7893-7902} }
TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation: Shuo Jin,

Siyue Yu,

Bingfeng Zhang,

Chao Yao,

Meiqin Liu,

Jimin Xiao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Shuo and Yu, Siyue and Zhang, Bingfeng and Yao, Chao and Liu, Meiqin and Xiao, Jimin}, title = {TALENT: Target-Aware Efficient Tuning for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7472-7482} }
Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models: Junlong Ke,

Zichen Wen,

Boxue Yang,

Yantai Yang,

Xuyang Liu,

Chenfei Liao,

Zhaorun Chen,

Shaobo Wang,

Linfeng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2026_CVPR, author = {Ke, Junlong and Wen, Zichen and Yang, Boxue and Yang, Yantai and Liu, Xuyang and Liao, Chenfei and Chen, Zhaorun and Wang, Shaobo and Zhang, Linfeng}, title = {Flash-Unified: A Training-Free and Task-Aware Acceleration Framework for Native Unified Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9131-9142} }
Super Sparse DETR: YOLO-Competitive Convergence and Acceleration: Hebao Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Hebao}, title = {Super Sparse DETR: YOLO-Competitive Convergence and Acceleration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6677-6684} }
Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning: Jungwon Choi,

Eunwoo Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jungwon and Kim, Eunwoo}, title = {Dual-Modality Anchor-Guided Filtering for Test-Time Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9368-9377} }
HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination: Hui Yu,

Xiao Ke,

Zhihong Zeng,

Huangbiao Xu,

Huanqi Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Hui and Ke, Xiao and Zeng, Zhihong and Xu, Huangbiao and Wu, Huanqi}, title = {HARP: Hierarchical Adaptive Ranking with Probabilistic Modeling for Skill Determination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8337-8346} }
Online Interpretable Matrix Decomposition for Large-Scale Streaming Data: Muhammad A. A. Abdelgawad,

Abdelrahman B. M. Eldaly,

Meng Xinmin,

Peng Jing,

Abdurrashid Ibrahim Sanka,

Ray C.C. Cheung,

Hong Yan; [pdf] [supp]
[bibtex]
@InProceedings{Abdelgawad_2026_CVPR, author = {Abdelgawad, Muhammad A. A. and Eldaly, Abdelrahman B. M. and Xinmin, Meng and Jing, Peng and Sanka, Abdurrashid Ibrahim and Cheung, Ray C.C. and Yan, Hong}, title = {Online Interpretable Matrix Decomposition for Large-Scale Streaming Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7030-7039} }
ROSE: Retrieval-Oriented Segmentation Enhancement: Song Tang,

Guangquan Jie,

Henghui Ding,

Yu-Gang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Song and Jie, Guangquan and Ding, Henghui and Jiang, Yu-Gang}, title = {ROSE: Retrieval-Oriented Segmentation Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7398-7407} }
MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness: JaeHyuck Choi,

Minjun Kim,

Je Hyeong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, JaeHyuck and Kim, Minjun and Hong, Je Hyeong}, title = {MAGIC: Few-Shot Mask-Guided Anomaly Inpainting with Prompt Perturbation, Spatially Adaptive Guidance, and Context Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8524-8533} }
GRAFT: Graph-Based Affordance Transfer via Part Correspondence: Mengying Lin,

Utkarsh Mishra,

Ajay Mandlekar,

Danfei Xu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Mengying and Mishra, Utkarsh and Mandlekar, Ajay and Xu, Danfei}, title = {GRAFT: Graph-Based Affordance Transfer via Part Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8746-8755} }
POMA-3D: The Point Map Way to 3D Scene Understanding: Ye Mao,

Weixun Luo,

Ranran Huang,

Junpeng Jing,

Krystian Mikolajczyk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ye and Luo, Weixun and Huang, Ranran and Jing, Junpeng and Mikolajczyk, Krystian}, title = {POMA-3D: The Point Map Way to 3D Scene Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7282-7292} }
BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding: Ziyi Zhao,

Jinzhao Zhou,

Xiaowei Jiang,

Beining Cao,

Wenhao Ma,

Yang Shen,

Ren Li,

Yu-Kai Wang,

Chin-teng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziyi and Zhou, Jinzhao and Jiang, Xiaowei and Cao, Beining and Ma, Wenhao and Shen, Yang and Li, Ren and Wang, Yu-Kai and Lin, Chin-teng}, title = {BrainStack: Neuro-MoE with Functionally Guided Expert Routing for EEG-Based Language Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7050-7059} }
Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization: Chao Sun,

Junbo Zhang,

Chuanbo Zhu,

Mingjun Huang,

Bo Du; [pdf]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Chao and Zhang, Junbo and Zhu, Chuanbo and Huang, Mingjun and Du, Bo}, title = {Background-Compensated Audio-Visual Semantic Modulation Framework for Audio-Visual Event Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7272-7281} }
Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models: Qingtao Pan,

Zhihao Dou,

Shuo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Qingtao and Dou, Zhihao and Li, Shuo}, title = {Frequency-Modulated Visual Restoration for Matryoshka Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9726-9737} }
Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision: Junjie Chen,

Zezheng Liu,

Runxiang Liu,

Yuming Fang,

Yifan Zuo,

Jiebin Yan; [pdf]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Junjie and Liu, Zezheng and Liu, Runxiang and Fang, Yuming and Zuo, Yifan and Yan, Jiebin}, title = {Learning to Propose Pose for Category-Agnostic Objects via Joint Refinement with Co-Matching Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7872-7882} }
ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction: Quanyuan Ruan,

Kewei Shi,

Jiabao Lei,

Xifeng Gao,

Xiaoguang Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Quanyuan and Shi, Kewei and Lei, Jiabao and Gao, Xifeng and Han, Xiaoguang}, title = {ARGS: Auto-Regressive Gaussian Splatting via Parallel Progressive Next-Scale Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8439-8448} }
VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding: Jianxiang He,

Meisheng Hong,

Jungang Li,

Weiyu Guo,

Xuming Hu,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jianxiang and Hong, Meisheng and Li, Jungang and Guo, Weiyu and Hu, Xuming and Xiong, Hui}, title = {VSI: Visual-Subtitle Integration for Keyframe Selection to Enhance Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9003-9012} }
MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data: Changhui Hu,

Bhalaji Nagarajan,

Ricardo Marques,

Petia Radeva; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Changhui and Nagarajan, Bhalaji and Marques, Ricardo and Radeva, Petia}, title = {MART: Mechanism-disentanglement Anchor-Routed Training for Learning with Open-World Noisy Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7018-7029} }
Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach: Ruichao Mao,

Zhou Fang,

Teng Guo,

Hao Yang,

Yaping Li,

Shaohua Peng,

Maji Huang,

Xiaoyu Lin,

Shuoyang Liu,

Xuepeng Li,

Yuyu Zhang,

Hai Rao; [pdf] [supp]
[bibtex]
@InProceedings{Mao_2026_CVPR, author = {Mao, Ruichao and Fang, Zhou and Guo, Teng and Yang, Hao and Li, Yaping and Peng, Shaohua and Huang, Maji and Lin, Xiaoyu and Liu, Shuoyang and Li, Xuepeng and Zhang, Yuyu and Rao, Hai}, title = {Reasoning for Mobile User Experience with Multimodal LLMs: Task, Benchmark, and Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8983-8992} }
Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment: Theodor Wulff,

Federico Tavella,

Rahul Singh Maharjan,

Manith Adikari,

Angelo Cangelosi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wulff_2026_CVPR, author = {Wulff, Theodor and Tavella, Federico and Maharjan, Rahul Singh and Adikari, Manith and Cangelosi, Angelo}, title = {Grounding Hierarchical Vision-Language-Action Models Through Explicit Language-Action Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9269-9281} }
AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning: Xingyu Yang,

Yidan Ma,

Hanzhang Qu,

Jianfu Cao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Xingyu and Ma, Yidan and Qu, Hanzhang and Cao, Jianfu}, title = {AdaMeta: Adaptive Meta-Learning with Dynamic Task Relational Inference for Few-shot learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7552-7561} }
Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification: Yichun Hu,

Zixuan Hu,

Ling-Yu Duan; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Yichun and Hu, Zixuan and Duan, Ling-Yu}, title = {Unsupervised Graph Partitioning Framework for Background Suppression in Multi-Query Vehicle Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6849-6858} }
Advancing Open-Set Detection and Segmentation via Disentangled Representations: Haokang Zhang,

Yuchen Guan,

Runxi Cheng,

Yujiu Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haokang and Guan, Yuchen and Cheng, Runxi and Yang, Yujiu}, title = {Advancing Open-Set Detection and Segmentation via Disentangled Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6622-6632} }
Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark: Ziyu Guo,

Xinyan Chen,

Renrui Zhang,

Ruichuan An,

Yu Qi,

Dongzhi Jiang,

Xiangtai Li,

Manyuan Zhang,

Hongsheng Li,

Pheng-Ann Heng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Ziyu and Chen, Xinyan and Zhang, Renrui and An, Ruichuan and Qi, Yu and Jiang, Dongzhi and Li, Xiangtai and Zhang, Manyuan and Li, Hongsheng and Heng, Pheng-Ann}, title = {Are Video Models Ready as Zero-Shot Reasoners? An Empirical Study with the MME-CoF Benchmark}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9175-9184} }
FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation: Zhi Rao,

Yucheng Zhou,

Benjia Zhou,

Yiqing Huang,

Sergio Escalera,

Jun Wan; [pdf]
[bibtex]
@InProceedings{Rao_2026_CVPR, author = {Rao, Zhi and Zhou, Yucheng and Zhou, Benjia and Huang, Yiqing and Escalera, Sergio and Wan, Jun}, title = {FVLF: A Reinforcing Vision-Language Framework for Gloss-Free Sign Language Translation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9237-9247} }
Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment: Sy-Tuyen Ho,

Koh Jun Hao,

Ngoc-Bao Nguyen,

Alexander Binder,

Ngai-Man Cheung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2026_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Nguyen, Ngoc-Bao and Binder, Alexander and Cheung, Ngai-Man}, title = {Revisiting Model Inversion Evaluation: From Misleading Standards to Reliable Privacy Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8070-8079} }
Layer Embedding Deep Fusion Graph Neural Network: Taihua Xu,

Genhao Tian,

Jicong Fan,

Xibei Yang,

Qinghua Zhang,

Yun Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Taihua and Tian, Genhao and Fan, Jicong and Yang, Xibei and Zhang, Qinghua and Cui, Yun}, title = {Layer Embedding Deep Fusion Graph Neural Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7091-7100} }
Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation: Jiedong Zhuang,

Lu Lu,

Ming Dai,

Jian Chen,

Qiang Liu,

Haoji Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Jiedong and Lu, Lu and Dai, Ming and Chen, Jian and Liu, Qiang and Hu, Haoji}, title = {Myopia Rectification: KV Cache Pruning for MLLMs Via Dynamic Attention Subsidy and Token Reclamation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9023-9033} }
FraQAT: Quantization Aware Training with Fractional Bits: Luca Morreale,

Alberto Gil C P Ramos,

Malcolm Chadwick,

Mehdi Noroozi,

Ruchika Chavhan,

Abhinav Mehrotra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Morreale_2026_CVPR, author = {Morreale, Luca and Gil C P Ramos, Alberto and Chadwick, Malcolm and Noroozi, Mehdi and Chavhan, Ruchika and Mehrotra, Abhinav}, title = {FraQAT: Quantization Aware Training with Fractional Bits}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8514-8523} }
A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning: Changyu Liu,

James Chenhao Liang,

Wenhao Yang,

Yiming Cui,

Jinghao Yang,

Tianyang Wang,

Qifan Wang,

Dongfang Liu,

Cheng Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Changyu and Liang, James Chenhao and Yang, Wenhao and Cui, Yiming and Yang, Jinghao and Wang, Tianyang and Wang, Qifan and Liu, Dongfang and Han, Cheng}, title = {A-SelecT: Automatic Timestep Selection for Diffusion Transformer Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6943-6954} }
VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation: Haitao Jiang,

Xu Li,

Yuanyang Cao,

Ying Zhang,

Jianji Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haitao and Li, Xu and Cao, Yuanyang and Zhang, Ying and Wang, Jianji}, title = {VR-CLIP: Visual Refinement of CLIP for Zero-Shot Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6809-6818} }
Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization: Zidong Zhao,

Yihao Huang,

Qing Guo,

Tianlin Li,

Anran Li,

Kailong Wang,

Jin Song Dong,

Geguang Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zidong and Huang, Yihao and Guo, Qing and Li, Tianlin and Li, Anran and Wang, Kailong and Dong, Jin Song and Pu, Geguang}, title = {Verify Claimed Text-to-Image Models Via Boundary-Aware Prompt Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8049-8058} }
EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification: Yuanlin He,

Zhenchuan Wang,

Jun Chen,

Yingying He,

Jiabao Wang,

Weiwen Wang,

Kun Xu,

Zijin Zhou,

Xiaoxiao Wang,

Mingju Chen,

Tingting Liu,

Zhisong Pan; [pdf]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yuanlin and Wang, Zhenchuan and Chen, Jun and He, Yingying and Wang, Jiabao and Wang, Weiwen and Xu, Kun and Zhou, Zijin and Wang, Xiaoxiao and Chen, Mingju and Liu, Tingting and Pan, Zhisong}, title = {EvoPrompt-ReID: A Bilevel Optimization Framework for Prompt-Encoder Co-evolution in Image Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6758-6767} }
Beyond Syntax: Action Semantics Learning for App Agents: Bohan Tang,

Dezhao Luo,

Jianheng Liu,

Jingxuan Chen,

Shaogang Gong,

Jianye Hao,

Jun Wang,

Kun Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bohan and Luo, Dezhao and Liu, Jianheng and Chen, Jingxuan and Gong, Shaogang and Hao, Jianye and Wang, Jun and Shao, Kun}, title = {Beyond Syntax: Action Semantics Learning for App Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9444-9454} }
Weakly-Supervised Referring Video Object Segmentation Through Text Supervision: Miaojing Shi,

Jun Huang,

Zijie Yue,

Hanli Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Miaojing and Huang, Jun and Yue, Zijie and Wang, Hanli}, title = {Weakly-Supervised Referring Video Object Segmentation Through Text Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7461-7471} }
Don't Let the Information Slip Away: Taozhe Li,

Guansu Wang,

Bo Yu,

Yiming Liu,

Wei Sun; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Taozhe and Wang, Guansu and Yu, Bo and Liu, Yiming and Sun, Wei}, title = {Don't Let the Information Slip Away}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8504-8513} }
Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning: Ryuki Tezuka,

Chihiro Nakatani,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tezuka_2026_CVPR, author = {Tezuka, Ryuki and Nakatani, Chihiro and Ukita, Norimichi}, title = {Group-DINOmics: Incorporating People Dynamics into DINO for Self-supervised Group Activity Feature Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8215-8225} }
Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration: Shaoguang Wang,

Weiyu Guo,

Ziyang Chen,

Yijie Xu,

Xuming Hu,

Hui Xiong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shaoguang and Guo, Weiyu and Chen, Ziyang and Xu, Yijie and Hu, Xuming and Xiong, Hui}, title = {Less is More: Token-Efficient Video-QA via Adaptive Frame-Pruning and Semantic Graph Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9856-9866} }
HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression: Haoxuan Li,

Mengyan Li,

Junjun Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Haoxuan and Li, Mengyan and Zheng, Junjun}, title = {HiVid-Narrator: Hierarchical Video Narrative Generation with Scene-Primed ASR-anchored Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8195-8204} }
Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection: Jielun Peng,

Yabin Wang,

Yaqi Li,

Long Kong,

Xiaopeng Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Jielun and Wang, Yabin and Li, Yaqi and Kong, Long and Hong, Xiaopeng}, title = {Leave No Stone Unturned: Uncovering Holistic Audio-Visual Intrinsic Coherence for Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6655-6666} }
RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation: Chanseul Cho,

Seokju Yun,

Jaesung Jun,

Seungjae Moon,

Youngmin Ro; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2026_CVPR, author = {Cho, Chanseul and Yun, Seokju and Jun, Jaesung and Moon, Seungjae and Ro, Youngmin}, title = {RecycleLoRA: Rank-Revealing QR-Based Dual-LoRA Subspace Adaptation for Domain Generalized Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7503-7513} }
Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation: JunJie Li,

Miyu Li,

Jiawei Wang,

Yu Liu,

Yumei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, JunJie and Li, Miyu and Wang, Jiawei and Liu, Yu and Wang, Yumei}, title = {Learning to Select, Learning to Judge: Active Preference Alignment for Mars Terrain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8887-8896} }
PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs: Zhilin Zhang,

Xiang Zhang,

Jiaqi Wei,

Yiwei Xu,

Chenyu You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhilin and Zhang, Xiang and Wei, Jiaqi and Xu, Yiwei and You, Chenyu}, title = {PosterGen: Aesthetic-Aware Multi-Modal Paper-to-Poster Generation Via Multi-Agent LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9813-9823} }
Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation: Jiaxuan Zhang,

Qianqian Xu,

Peisong Wen,

Siran Dai,

Yang Liu,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jiaxuan and Xu, Qianqian and Wen, Peisong and Dai, Siran and Liu, Yang and Huang, Qingming}, title = {Safe Codebook: Token-Level Moderation for Safer Visual Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7924-7934} }
Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation: Mian Muhammad Naeem Abid,

Radu Timofte; [pdf] [supp]
[bibtex]
@InProceedings{Abid_2026_CVPR, author = {Abid, Mian Muhammad Naeem and Timofte, Radu}, title = {Gazemo: Mimicking Human Saccades via Foveal-Peripheral Feature Modeling for Lightweight Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7293-7303} }
REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection: Chengxi Chu,

Nurul Japar,

Chee Kau Lim; [pdf]
[bibtex]
@InProceedings{Chu_2026_CVPR, author = {Chu, Chengxi and Japar, Nurul and Lim, Chee Kau}, title = {REBA: Residual Mixture-of-Experts and Bidirectional Video-Text Alignment for Better Fine-grained Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8280-8290} }
ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning: Qin Li,

Qi Li,

Limei Liu,

Junfeng Yang,

Han Peng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Qin and Li, Qi and Liu, Limei and Yang, Junfeng and Peng, Han}, title = {ZeroDiff++: Balancing Semantic Diffusion Dynamics for Robust Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6997-7007} }
Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation: Yi Yang,

Qiang Jiao,

Mengrui Shi,

Qiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yi and Jiao, Qiang and Shi, Mengrui and Zhang, Qiang}, title = {Towards Complete Activation: Foreground-Background Multi-Perspective Guided Cross-Support for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7378-7387} }
Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition: Gurucharan Srinivas,

Joshua Niemeijer,

Frank Köster; [pdf] [supp]
[bibtex]
@InProceedings{Srinivas_2026_CVPR, author = {Srinivas, Gurucharan and Niemeijer, Joshua and K\"oster, Frank}, title = {Learning to Reason: Targeted Knowledge Discovery and Fuzzy Logic Update for Robust Image Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7122-7131} }
Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection: Xiaowei Zhao,

Zhide Liu,

Yuqing Ma,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Xiaowei and Liu, Zhide and Ma, Yuqing and Liu, Xianglong}, title = {Semantic Guided Feature Disentanglement and Reconstruction for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9357-9367} }
Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection: Ziyang Zheng,

Weiyan Chen,

Yao Xiao,

Zijie Cao,

Dongyu Zhang,

Pengxu Wei; [pdf]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Ziyang and Chen, Weiyan and Xiao, Yao and Cao, Zijie and Zhang, Dongyu and Wei, Pengxu}, title = {Exploiting the Source-Asymmetry Confidence Gap for Generalizable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8715-8724} }
IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing: Yuxuan Zhang,

Shijia Huang,

Liwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yuxuan and Huang, Shijia and Wang, Liwei}, title = {IntentEdit: Multi-Agent Reasoning for Intent-Driven Complex Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8776-8785} }
GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting: Yuhan Ping,

Cheng Lin,

Yuan Liu,

Zhiyang Dou,

Jia Pan,

Wenping Wang; [pdf]
[bibtex]
@InProceedings{Ping_2026_CVPR, author = {Ping, Yuhan and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Pan, Jia and Wang, Wenping}, title = {GaussFiller: Unleashing VLM-Expert Guidance for 3D Scene Completion with 3D Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7132-7142} }
DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering: Xulun Ye,

Yuanyuan Deng,

Kun Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Xulun and Deng, Yuanyuan and Zhou, Kun}, title = {DGD: Density Gradient-guided Diffusion for Long-Tailed Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7654-7664} }
UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization: Qing Huang,

Zhipei Xu,

Xuanyu Zhang,

Xiangyu Yu,

Jian Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Qing and Xu, Zhipei and Zhang, Xuanyu and Yu, Xiangyu and Zhang, Jian}, title = {UniShield: An Adaptive Multi-Agent Framework for Unified Forgery Image Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8121-8132} }
Another BRIXEL in the Wall: Towards Cheaper Dense Features: Alexander Lappe,

Martin A. Giese; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lappe_2026_CVPR, author = {Lappe, Alexander and Giese, Martin A.}, title = {Another BRIXEL in the Wall: Towards Cheaper Dense Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7605-7614} }
VIDEOP2R: Video Understanding from Perception to Reasoning: Yifan Jiang,

Yueying Wang,

Rui Zhao,

Toufiq Parag,

Zhimin Chen,

Zhenyu Liao,

Jayakrishnan Unnikrishnan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yifan and Wang, Yueying and Zhao, Rui and Parag, Toufiq and Chen, Zhimin and Liao, Zhenyu and Unnikrishnan, Jayakrishnan}, title = {VIDEOP2R: Video Understanding from Perception to Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8303-8313} }
Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution: Yuexin Wang,

Xiaolei Wang,

Guangliang Cheng,

Huihui Bai,

Tammam Tillo,

Jimin Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuexin and Wang, Xiaolei and Cheng, Guangliang and Bai, Huihui and Tillo, Tammam and Xiao, Jimin}, title = {Plug-and-Play Dynamic In-context Learning with Stochastic Regularization for Screen Content Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8836-8845} }
Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts: Yongqi Yang,

Yuke Li,

Heng Huang,

Zhihui Li,

Bo Du,

Yu Wu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yongqi and Li, Yuke and Huang, Heng and Li, Zhihui and Du, Bo and Wu, Yu}, title = {Improving Synthesized Image Detection by Disentangling Generator-Shared and Generator-Specific Image Artifacts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8019-8028} }
Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection: Peng Zhang,

Xiang Yuan,

Cong Li,

Junwei Han,

Gong Cheng; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Peng and Yuan, Xiang and Li, Cong and Han, Junwei and Cheng, Gong}, title = {Once for All: An End-to-End Paradigm for VLM-Based Domain-Generalized Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6829-6838} }
Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation: Sanjana Reddy,

Ishaan Malhi,

Sally Ma,

Praneet Dutta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2026_CVPR, author = {Reddy, Sanjana and Malhi, Ishaan and Ma, Sally and Dutta, Praneet}, title = {Di3PO - Diptych Diffusion DPO for Targeted Improvements in Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8868-8876} }
VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection: Hui Han,

Shunli Wang,

Yandan Zhao,

Taiping Yao,

Shouhong Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2026_CVPR, author = {Han, Hui and Wang, Shunli and Zhao, Yandan and Yao, Taiping and Ding, Shouhong}, title = {VRAG-DFD: Verifiable Retrieval-Augmentation for MLLM-based Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9552-9562} }
Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs: Hyungjin Chung,

Hyelin Nam,

Jiyeon Kim,

Hyojun Go,

Byeongjun Park,

Junho Kim,

Joonseok Lee,

Seongsu Ha,

Byung-Hoon Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2026_CVPR, author = {Chung, Hyungjin and Nam, Hyelin and Kim, Jiyeon and Go, Hyojun and Park, Byeongjun and Kim, Junho and Lee, Joonseok and Ha, Seongsu and Kim, Byung-Hoon}, title = {Video Parallel Scaling: Aggregating Diverse Frame Subsets for VideoLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8972-8982} }
NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries: Kanon Amemiya,

Daichi Yashima,

Kei Katsumata,

Takumi Komatsu,

Ryosuke Korekata,

Seitaro Otsuki,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Amemiya_2026_CVPR, author = {Amemiya, Kanon and Yashima, Daichi and Katsumata, Kei and Komatsu, Takumi and Korekata, Ryosuke and Otsuki, Seitaro and Sugiura, Komei}, title = {NaiLIA: Multimodal Nail Design Retrieval Based on Dense Intent Descriptions and Palette Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9034-9044} }
AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models: Zijin Zhou,

Songan Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Zijin and Zhang, Songan}, title = {AITP: Traffic Accident Responsibility Allocation via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9259-9268} }
CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging: Ashwin Kumar,

Robbie Holland,

Corey Barrett,

Jangwon Kim,

Maya Varma,

Zhihong Chen,

Yunhe Gao,

Greg Zaharchuk,

Tara Taghavi,

Krishnaram Kenthapadi,

Akshay Chaudhari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2026_CVPR, author = {Kumar, Ashwin and Holland, Robbie and Barrett, Corey and Kim, Jangwon and Varma, Maya and Chen, Zhihong and Gao, Yunhe and Zaharchuk, Greg and Taghavi, Tara and Kenthapadi, Krishnaram and Chaudhari, Akshay}, title = {CheXmix: Unified Generative Pretraining for Vision Language Models in Medical Imaging}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9466-9476} }
Entropy-Based Visual Re-perception Inference for Multimodal Models: Jia Liufu,

Qiangyu Yan,

Zhehan Kan,

Wenming Yang,

Hailin Hu,

Xinghao Chen,

Borui Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Liufu_2026_CVPR, author = {Liufu, Jia and Yan, Qiangyu and Kan, Zhehan and Yang, Wenming and Hu, Hailin and Chen, Xinghao and Jiang, Borui}, title = {Entropy-Based Visual Re-perception Inference for Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9770-9779} }
Is Prompt Selection Necessary for Task-Free Online Continual Learning?: Seoyoung Park,

Haemin Lee,

Hankook Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Seoyoung and Lee, Haemin and Lee, Hankook}, title = {Is Prompt Selection Necessary for Task-Free Online Continual Learning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7883-7892} }
Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection: Hantao Zheng,

Ning Han,

Yawen Zeng,

Hegui Zhu,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hantao and Han, Ning and Zeng, Yawen and Zhu, Hegui and Chen, Hao}, title = {Logical Consistency Optimization for Few-Shot Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9045-9054} }
ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection: Ling Yi,

Zhe Chen,

Gaochang Wu,

Jinliang Ding,

Xiaojie Wang,

Zhaolong Ning; [pdf]
[bibtex]
@InProceedings{Yi_2026_CVPR, author = {Yi, Ling and Chen, Zhe and Wu, Gaochang and Ding, Jinliang and Wang, Xiaojie and Ning, Zhaolong}, title = {ReMem: A Dynamic Memory Evolution Detector for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7697-7705} }
CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment: Satyam Merothiya,

Chanda Grover Kamra,

Indra Deep Mastan; [pdf] [supp]
[bibtex]
@InProceedings{Merothiya_2026_CVPR, author = {Merothiya, Satyam and Kamra, Chanda Grover and Mastan, Indra Deep}, title = {CLIPtone-GO: Geometry-Aware, Gradient-Orthogonalized Text-Guided Color Tone Adjustment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8695-8704} }
FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning: Mengjie Li,

Liu Yang,

Qi Shen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Mengjie and Yang, Liu and Shen, Qi}, title = {FedAR: Attribute-Guided Representation Learning for Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6987-6996} }
Model Merging on Loss Landscapes: A Geometric Perspective: Juanwu Lu,

Anand Bhaskar,

Brian Axelrod,

Ekaterina Tolstaya,

Tristan Emrich; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Juanwu and Bhaskar, Anand and Axelrod, Brian and Tolstaya, Ekaterina and Emrich, Tristan}, title = {Model Merging on Loss Landscapes: A Geometric Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7644-7653} }
SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection: Yongchao Feng,

Ziyue Huang,

Jinqing Zhang,

Wenrui Cai,

Qingjie Liu; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Yongchao and Huang, Ziyue and Zhang, Jinqing and Cai, Wenrui and Liu, Qingjie}, title = {SAGA: Semantic Anchor-Guided Alignment for Multi-Source Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7779-7788} }
VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering: Zihu Wang,

Boxun Xu,

Yuxuan Xia,

Peng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zihu and Xu, Boxun and Xia, Yuxuan and Li, Peng}, title = {VEGAS: Mitigating Hallucinations in Large Vision-Language Models via Vision-Encoder Attention Guided Adaptive Steering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9055-9064} }
Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning: Sungwon Woo,

Dongjun Hwang,

Shiwon Kim,

Junsuk Choe,

Jongho Nang; [pdf] [supp]
[bibtex]
@InProceedings{Woo_2026_CVPR, author = {Woo, Sungwon and Hwang, Dongjun and Kim, Shiwon and Choe, Junsuk and Nang, Jongho}, title = {Training-Free Uncertainty-guided Logit Adjustment for Few-Shot Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7634-7643} }
FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment: Yicong Li,

Howard Leung; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yicong and Leung, Howard}, title = {FineGrade: A Rule-Consistent Scoring Framework for Fine-Grained Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8258-8267} }
WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation: Z. Jonny Kong,

Sibendu Paul,

Y. Charlie Hu; [pdf] [supp]
[bibtex]
@InProceedings{Kong_2026_CVPR, author = {Kong, Z. Jonny and Paul, Sibendu and Hu, Y. Charlie}, title = {WideEye: Achieving Wide Field-of-view Traffic Video Analytics With Dynamic Orientation Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8618-8628} }
Organizing Unstructured Image Collections using Natural Language: Mingxuan Liu,

Zhun Zhong,

Jun Li,

Gianni Franchi,

Subhankar Roy,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Mingxuan and Zhong, Zhun and Li, Jun and Franchi, Gianni and Roy, Subhankar and Ricci, Elisa}, title = {Organizing Unstructured Image Collections using Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8907-8918} }
Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs: Jouwon Song,

Sohyeon Kim,

Kyeongbo Kong; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Jouwon and Kim, Sohyeon and Kong, Kyeongbo}, title = {Uncertainty-Guided Graph Formulation via MWIS for Token Pruning in LVLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9510-9519} }
BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models: Ziheng Zhu,

Yuncheng Guo,

Jie Xu,

Xiaodong Gu; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Ziheng and Guo, Yuncheng and Xu, Jie and Gu, Xiaodong}, title = {BiomedHELIX : HiErarchical-Local Interaction eXploration for Biomedical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7060-7069} }
Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation: Shuqi Xia,

Guangze Shi,

Jiarui Cao,

Aoyuan Shi,

Meilin Liu,

Xiaoyi Zhang,

Yujie Wang,

Xueyu Liu,

Cai Zhao,

Ziyuan He,

Yongfei Wu,

Mingqiang Wei; [pdf]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Shuqi and Shi, Guangze and Cao, Jiarui and Shi, Aoyuan and Liu, Meilin and Zhang, Xiaoyi and Wang, Yujie and Liu, Xueyu and Zhao, Cai and He, Ziyuan and Wu, Yongfei and Wei, Mingqiang}, title = {Instruction-Focus-Prompt:Semantics-Driven Structural Prompts for Universal SAM Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7514-7519} }
VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning: Zengjie Hu,

Jiantao Qiu,

Tianyi Bai,

Haojin Yang,

Binhang Yuan,

Qi Jing,

Conghui He,

Wentao Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2026_CVPR, author = {Hu, Zengjie and Qiu, Jiantao and Bai, Tianyi and Yang, Haojin and Yuan, Binhang and Jing, Qi and He, Conghui and Zhang, Wentao}, title = {VADE: Variance-Aware Dynamic Sampling via Online Sample-Level Difficulty Estimation for Multimodal Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9846-9855} }
Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling: Minyoung Kim,

Paul Hongsuck Seo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, Minyoung and Seo, Paul Hongsuck}, title = {Robust Image Self-Recovery against Tampering using Watermark Generation with Pixel Shuffling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8877-8886} }
GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction: Wanyu Zhang,

Yanzhao Shi,

Chengxin Zheng,

Hua Wang,

Jianing Wang,

Yue Zhang,

Xiaobing Yu,

Xiaodan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyu and Shi, Yanzhao and Zheng, Chengxin and Wang, Hua and Wang, Jianing and Zhang, Yue and Yu, Xiaobing and Zhang, Xiaodan}, title = {GenSRL: Generative Spatiotemporal Representation Learning for Ophthalmic Prognosis Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9622-9631} }
AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts: Yuting Gao,

Lan Wang,

Hengyuan Zhao,

Linjiang Huang,

Si Liu,

Qingpei Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuting and Wang, Lan and Zhao, Hengyuan and Huang, Linjiang and Liu, Si and Guo, Qingpei}, title = {AnyExperts: On-Demand Expert Allocation for Multimodal Language Models with Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9205-9214} }
SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching: Chengshan Yang,

Pengnian Zhang,

Jinjing Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Chengshan and Zhang, Pengnian and Zhao, Jinjing}, title = {SA-Matching DETR: A Lightweight Transformer Detector with Enhanced Scale Adaptive Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6695-6705} }
PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction: Xueheng Li,

Tao Hu,

Ke Cao,

Runsheng Qi,

Huixin Zhang,

Rui Li,

Jie Zhang,

Chengjun Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xueheng and Hu, Tao and Cao, Ke and Qi, Runsheng and Zhang, Huixin and Li, Rui and Zhang, Jie and Xie, Chengjun}, title = {PestVL-Net: Enabling Multimodal Pest Learning Via Fine-grained Vision-Language Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8826-8835} }
RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models: Ravi Ranjan,

Utkarsh Grover,

Xiaomin Lin,

Agoritsa Polyzou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranjan_2026_CVPR, author = {Ranjan, Ravi and Grover, Utkarsh and Lin, Xiaomin and Polyzou, Agoritsa}, title = {RAZOR: Ratio-Aware Layer Editing for Targeted Unlearning in Vision Transformers and Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7998-8008} }
Beyond Single Object: Learning 3D Relations with Large Language Models: Kohsuke Ide,

Ryousuke Yamada,

Yue Qiu,

Xianzheng Ma,

Yoshihiro Fukuhara,

Hirokatsu Kataoka,

Yutaka Satoh; [pdf] [supp]
[bibtex]
@InProceedings{Ide_2026_CVPR, author = {Ide, Kohsuke and Yamada, Ryousuke and Qiu, Yue and Ma, Xianzheng and Fukuhara, Yoshihiro and Kataoka, Hirokatsu and Satoh, Yutaka}, title = {Beyond Single Object: Learning 3D Relations with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9684-9694} }
Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models: Sijie Wang,

Yingying Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Sijie and Zhu, Yingying}, title = {Task-Specific Knowledge Improves Generalization: A Logits-Based Framework for Continual Learning of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7615-7624} }
Learning from Label Proportion with Dual-Proportion Constraints: Tianhao Ma,

Ximing Li,

Changchun Li,

Renchu Guan; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Tianhao and Li, Ximing and Li, Changchun and Guan, Renchu}, title = {Learning from Label Proportion with Dual-Proportion Constraints}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7583-7592} }
Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis: Helu Zhi,

Jingjing Huang,

Wang Xu,

Yangbin Xu,

Yibin Huang,

Wanyue Zhang,

Baoyang Jiang,

Shirui Deng,

Liang Zhu,

FangFang Li,

Tiejun Zhao,

Yankai Lin,

Yuan Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhi_2026_CVPR, author = {Zhi, Helu and Huang, Jingjing and Xu, Wang and Xu, Yangbin and Huang, Yibin and Zhang, Wanyue and Jiang, Baoyang and Deng, Shirui and Zhu, Liang and Li, FangFang and Zhao, Tiejun and Lin, Yankai and Yao, Yuan}, title = {Scaling Spatial Reasoning in MLLMs through Programmatic Data Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9215-9224} }
PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning: Tao Huang,

Jiayang Meng,

Hong Chen,

Chen Hou,

Guolong Zheng,

Xu Yang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tao and Meng, Jiayang and Chen, Hong and Hou, Chen and Zheng, Guolong and Yang, Xu}, title = {PLR-Gate: Real-Time Gradient Privacy Assessment and Gated Transmission for Secure Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8029-8038} }
Gen-n-Val: Agentic Image Data Generation and Validation: Jing-En Huang,

I-Sheng Fang,

Tzuhsuan Huang,

Yu-Lun Liu,

Chih-Yu Wang,

Jun-Cheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Jing-En and Fang, I-Sheng and Huang, Tzuhsuan and Liu, Yu-Lun and Wang, Chih-Yu and Chen, Jun-Cheng}, title = {Gen-n-Val: Agentic Image Data Generation and Validation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8786-8795} }
QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos: Zijun Xu,

Zhengqian Wu,

Chunjie Zhang,

Zhongyuan Wang,

Chunxia Xiao,

Chao Liang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zijun and Wu, Zhengqian and Zhang, Chunjie and Wang, Zhongyuan and Xiao, Chunxia and Liang, Chao}, title = {QENN: A Quantum Entanglement-Inspired Neural Network for Interaction and Relationship Prediction in Story Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8247-8257} }
AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting: Yuyuan Liu,

Yuanhong Chen,

Chong Wang,

Junlin Han,

Junde Wu,

Can Peng,

Jingkun Chen,

Yu Tian,

Gustavo Carneiro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuyuan and Chen, Yuanhong and Wang, Chong and Han, Junlin and Wu, Junde and Peng, Can and Chen, Jingkun and Tian, Yu and Carneiro, Gustavo}, title = {AuralSAM2: Enabling SAM2 Hear Through Pyramid Audio-Visual Feature Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7315-7325} }
Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal: Eun-Ju Park,

Youjin Shin,

Simon S. Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Eun-Ju and Shin, Youjin and Woo, Simon S.}, title = {Robust Continual Unlearning against Knowledge Erosion and Forgetting Reversal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7978-7987} }
Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation: Wenjie Zhao,

Jia Li,

Xin Dong,

Yapeng Tian,

Yu Xiang,

Yunhui Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Wenjie and Li, Jia and Dong, Xin and Tian, Yapeng and Xiang, Yu and Guo, Yunhui}, title = {Mitigating the ID-OOD Tradeoff in Open-Set Test-Time Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6789-6798} }
Towards Robust Content Watermarking Against Removal and Forgery Attacks: Yifan Zhu,

Yihan Wang,

Xiao-Shan Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Yifan and Wang, Yihan and Gao, Xiao-Shan}, title = {Towards Robust Content Watermarking Against Removal and Forgery Attacks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8059-8069} }
ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models: Nastaran Darabi,

Amit Ranjan Trivedi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Darabi_2026_CVPR, author = {Darabi, Nastaran and Trivedi, Amit Ranjan}, title = {ProGAL-VLA: Grounded Alignment through Prospective Reasoning in Vision-Language-Action Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9013-9022} }
Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation: Yadang Chen,

Qi Liu,

Guoqing Zhang,

Le Sun,

Yuhui Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yadang and Liu, Qi and Zhang, Guoqing and Sun, Le and Zheng, Yuhui}, title = {Frequency-Guided Iterative Bi-directional Exchange Network for Cross-Domain Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7841-7851} }
LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation: Haichao Zhang,

Yao Lu,

Lichen Wang,

Yunzhe Li,

Daiwei Chen,

Yunpeng Xu,

Yun Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Haichao and Lu, Yao and Wang, Lichen and Li, Yunzhe and Chen, Daiwei and Xu, Yunpeng and Fu, Yun}, title = {LinkedOut: Linking World Knowledge Representation Out of Video LLM for Next-Generation Video Recommendation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7111-7121} }
VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning: Chenglin Li,

Qianglong Chen,

Feng Han,

Yikun Wang,

Xingxi Yin,

Yan Gong,

Ruilin Li,

Yin Zhang,

Jiaqi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chenglin and Chen, Qianglong and Han, Feng and Wang, Yikun and Yin, Xingxi and Gong, Yan and Li, Ruilin and Zhang, Yin and Wang, Jiaqi}, title = {VideoThinker: Building Agentic VideoLLMs with LLM-Guided Tool Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8226-8236} }
EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration: Runze Li,

Yuwen Zhai,

Bo Xu,

Liwu Xu,

Nian Shi,

Wei Zhang,

Ran Lin,

Liang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Runze and Zhai, Yuwen and Xu, Bo and Xu, Liwu and Shi, Nian and Zhang, Wei and Lin, Ran and Wang, Liang}, title = {EchoTrail-GUI: Building Actionable Memory for GUI Agents via Critic-Guided Self-Exploration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9347-9356} }
V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning: Zixu Cheng,

Jian Hu,

Ziquan Liu,

Chenyang Si,

Wei Li,

Shaogang Gong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Zixu and Hu, Jian and Liu, Ziquan and Si, Chenyang and Li, Wei and Gong, Shaogang}, title = {V-STaR: Benchmarking Video-LLMs on Video Spatio-Temporal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9155-9164} }
Temporally Consistent Long-Term Memory for 3D Single Object Tracking: Jaejoon Yoo,

SuBeen Lee,

Yerim Jeon,

Miso Lee,

Jae-Pil Heo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yoo_2026_CVPR, author = {Yoo, Jaejoon and Lee, SuBeen and Jeon, Yerim and Lee, Miso and Heo, Jae-Pil}, title = {Temporally Consistent Long-Term Memory for 3D Single Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8388-8397} }
Why MLLMs Struggle to Determine Object Orientations: Anju Gopinath,

Nikhil Krishnaswamy,

Bruce Draper; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gopinath_2026_CVPR, author = {Gopinath, Anju and Krishnaswamy, Nikhil and Draper, Bruce}, title = {Why MLLMs Struggle to Determine Object Orientations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9836-9845} }
Mull-Tokens: Modality-Agnostic Latent Thinking: Arijit Ray,

Ahmed Abdelkader,

Chengzhi Mao,

Bryan A. Plummer,

Kate Saenko,

Ranjay Krishna,

Leonidas Guibas,

Wen-Sheng Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Arijit and Abdelkader, Ahmed and Mao, Chengzhi and Plummer, Bryan A. and Saenko, Kate and Krishna, Ranjay and Guibas, Leonidas and Chu, Wen-Sheng}, title = {Mull-Tokens: Modality-Agnostic Latent Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9477-9488} }
RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models: Omar Alama,

Darshil Jariwala,

Avigyan Bhattacharya,

Seungchan Kim,

Wenshan Wang,

Sebastian Scherer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alama_2026_CVPR, author = {Alama, Omar and Jariwala, Darshil and Bhattacharya, Avigyan and Kim, Seungchan and Wang, Wenshan and Scherer, Sebastian}, title = {RADSeg: Unleashing Parameter and Compute Efficient Zero-Shot Open-Vocabulary Segmentation Using Agglomerative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9294-9304} }
SPHINX: A Synthetic Environment for Visual Perception and Reasoning: Md Tanvirul Alam,

Saksham Aggarwal,

Justin Yang Chae,

Nidhi Rastogi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alam_2026_CVPR, author = {Alam, Md Tanvirul and Aggarwal, Saksham and Chae, Justin Yang and Rastogi, Nidhi}, title = {SPHINX: A Synthetic Environment for Visual Perception and Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9489-9499} }
OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism: Jordan Shipard,

Arnold Wiliem,

Kien Nguyen Thanh,

Wei Xiang,

Clinton Fookes; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shipard_2026_CVPR, author = {Shipard, Jordan and Wiliem, Arnold and Thanh, Kien Nguyen and Xiang, Wei and Fookes, Clinton}, title = {OmniGCD: Abstracting Generalized Category Discovery for Modality Agnosticism}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6768-6778} }
Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models: Woojun Jung,

Jaehoon Go,

Mingyu Jeon,

Sunjae Yoon,

Junyeong Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jung_2026_CVPR, author = {Jung, Woojun and Go, Jaehoon and Jeon, Mingyu and Yoon, Sunjae and Kim, Junyeong}, title = {Visual Funnel: Resolving Contextual Blindness in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8962-8971} }
HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models: Zhinan Xie,

Peisong Wang,

Shuang Qiu,

Jian Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhinan and Wang, Peisong and Qiu, Shuang and Cheng, Jian}, title = {HiViS: Hiding Visual Tokens from the Drafter for Speculative Decoding in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8952-8961} }
Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification: Inès Hyeonsu Kim,

Woojeong Jin,

Soowon Son,

Junyoung Seo,

Seokju Cho,

JeongYeol Baek,

Byeongwon Lee,

JoungBin Lee,

Seungryong Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2026_CVPR, author = {Kim, In\`es Hyeonsu and Jin, Woojeong and Son, Soowon and Seo, Junyoung and Cho, Seokju and Baek, JeongYeol and Lee, Byeongwon and Lee, JoungBin and Kim, Seungryong}, title = {Pose-dIVE: Pose-Diversified Augmentation for Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8640-8650} }
Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation: Jianhang Ji,

Zhiming Cheng,

Jianxiang Zhao,

Bingtao Ma,

Hao Chen,

Yuhan Gao,

Lian Zhang,

Zuobin Ying,

Shuai Wang; [pdf]
[bibtex]
@InProceedings{Ji_2026_CVPR, author = {Ji, Jianhang and Cheng, Zhiming and Zhao, Jianxiang and Ma, Bingtao and Chen, Hao and Gao, Yuhan and Zhang, Lian and Ying, Zuobin and Wang, Shuai}, title = {Wake the Sleeping Weights: Sparsely-Activated Continual Test-Time Adaptation for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7799-7809} }
Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space: Chengzhi Liu,

Yuzhe Yang,

Yue Fan,

Qingyue Wei,

Sheng Liu,

Xin Eric Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Chengzhi and Yang, Yuzhe and Fan, Yue and Wei, Qingyue and Liu, Sheng and Wang, Xin Eric}, title = {Reasoning Within the Mind: Dynamic Multimodal Interleaving in Latent Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9225-9236} }
SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units: Ruibin Wang,

Zhenyu Lin,

Xinhai Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ruibin and Lin, Zhenyu and Zhao, Xinhai}, title = {SignReasoner: Compositional Reasoning for Complex Traffic Sign Understanding Via Functional Structure Units}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8796-8805} }
Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation: Hongli Liu,

Yu Wang,

Shengjie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Hongli and Wang, Yu and Zhao, Shengjie}, title = {Unify the Views: View-Consistent Prototype Learning for Few-Shot Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7419-7428} }
A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models: Mehmet Demirel,

Christos Kyrkou; [pdf] [supp]
[bibtex]
@InProceedings{Demirel_2026_CVPR, author = {Demirel, Mehmet and Kyrkou, Christos}, title = {A Single Pixel is All You Need: Weakly Supervised Medical Image Segmentation using Discrete Denoising Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7541-7551} }
Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps: Xiangjun Gao,

Zhensong Zhang,

Dave Zhenyu Chen,

Songcen Xu,

Long Quan,

Eduardo Pérez-Pellitero,

Youngkyoon Jang; [pdf] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Xiangjun and Zhang, Zhensong and Chen, Dave Zhenyu and Xu, Songcen and Quan, Long and P\'erez-Pellitero, Eduardo and Jang, Youngkyoon}, title = {Map2Thought: Explicit 3D Spatial Reasoning via Metric Cognitive Maps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7154-7164} }
On the Group Disparities Arising from Machine Unlearning: Zijie Pan,

Zuobin Ying,

Yajie Wang,

Liehuang Zhu,

Wanlei Zhou; [pdf]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Zijie and Ying, Zuobin and Wang, Yajie and Zhu, Liehuang and Zhou, Wanlei}, title = {On the Group Disparities Arising from Machine Unlearning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8133-8142} }
Language-Augmented Semantic Priors for B-Spline Surface Fitting: Yunzhong Lou,

Yusheng Luo,

Jiahao Li,

Yu Song,

Xiangdong Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Lou_2026_CVPR, author = {Lou, Yunzhong and Luo, Yusheng and Li, Jiahao and Song, Yu and Zhou, Xiangdong}, title = {Language-Augmented Semantic Priors for B-Spline Surface Fitting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9120-9130} }
FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning: Jingchen Ni,

Quan Zhang,

Dan Jiang,

Keyu Lv,

Ke Zhang,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ni_2026_CVPR, author = {Ni, Jingchen and Zhang, Quan and Jiang, Dan and Lv, Keyu and Zhang, Ke and Yuan, Chun}, title = {FCL-COD: Weakly Supervised Camouflaged Object Detection with Frequency-aware and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7439-7449} }
Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection: Alex Costanzino,

Pierluigi Zama Ramirez,

Giuseppe Lisanti,

Luigi Di Stefano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2026_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Modulate-and-Map: Crossmodal Feature Mapping with Cross-View Modulation for 3D Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8816-8825} }
MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking: Yisong Liu,

He Yao,

Junlong Cheng,

Yujie Lu,

Junqi Bai,

Min Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yisong and Yao, He and Cheng, Junlong and Lu, Yujie and Bai, Junqi and Zhu, Min}, title = {MOSSTrack : Modality-Specific Spatio-Temporal Context Learning for RGB-T Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8378-8387} }
CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection: Qiyu Chen,

Zhen Qu,

Wei Luo,

Haiming Yao,

Yunkang Cao,

Yuxin Jiang,

Yinan Duan,

Huiyuan Luo,

Chengkan Lv,

Zhengtao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Qiyu and Qu, Zhen and Luo, Wei and Yao, Haiming and Cao, Yunkang and Jiang, Yuxin and Duan, Yinan and Luo, Huiyuan and Lv, Chengkan and Zhang, Zhengtao}, title = {CoPS: Conditional Prompt Synthesis for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8554-8563} }
Test-Time Distillation for Continual Model Adaptation: Xiao Chen,

Jiazhen Huang,

Zhiming Liu,

Qinting Jiang,

Fanding Huang,

Jingyan Jiang,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiao and Huang, Jiazhen and Liu, Zhiming and Jiang, Qinting and Huang, Fanding and Jiang, Jingyan and Wang, Zhi}, title = {Test-Time Distillation for Continual Model Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7593-7604} }
Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality: Yanming Xiu,

Zhengyuan Jiang,

Neil Zhenqiang Gong,

Maria Gorlatova; [pdf] [arXiv]
[bibtex]
@InProceedings{Xiu_2026_CVPR, author = {Xiu, Yanming and Jiang, Zhengyuan and Gong, Neil Zhenqiang and Gorlatova, Maria}, title = {Benchmarking Vision-Language Models under Contradictory Virtual Content Attacks in Augmented Reality}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9110-9119} }
Disrupting Positional Encoding for Effective Open Set Recognition: Yu Wang,

Jiabo Xie,

Yucan Zhou,

Junxian Mu,

Qinghua Hu,

Pengfei Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yu and Xie, Jiabo and Zhou, Yucan and Mu, Junxian and Hu, Qinghua and Zhu, Pengfei}, title = {Disrupting Positional Encoding for Effective Open Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6633-6642} }
Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback: Jianglin Lu,

Yuanwei Wu,

Ziyi Zhao,

Hongcheng Wang,

Felix Jimenez,

Abrar Majeedi,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Jianglin and Wu, Yuanwei and Zhao, Ziyi and Wang, Hongcheng and Jimenez, Felix and Majeedi, Abrar and Fu, Yun}, title = {Restore-R1: Efficient Image Restoration Agents via Reinforcement Learning with Multimodal LLM Perceptual Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8629-8639} }
Bi-Level Optimization for Single Domain Generalization: Marzi Heidari,

Hanping Zhang,

Hao Yan,

Yuhong Guo; [pdf] [arXiv]
[bibtex]
@InProceedings{Heidari_2026_CVPR, author = {Heidari, Marzi and Zhang, Hanping and Yan, Hao and Guo, Yuhong}, title = {Bi-Level Optimization for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6685-6694} }
EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction: Xinan Zhang,

Muhammad Zubair Irshad,

Anthony Yezzi,

Yi-Chang Tsai,

Zsolt Kira; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xinan and Irshad, Muhammad Zubair and Yezzi, Anthony and Tsai, Yi-Chang and Kira, Zsolt}, title = {EscherNet++: A Scalable Multi-View Framework for Amodal Completion, Novel View Synthesis and Feed-Forward 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {8846-8856} }
Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?: Zihao Dongfang,

Xu Zheng,

Ziqiao Weng,

Yuanhuiyi Lyu,

Danda Pani Paudel,

Luc Van Gool,

Kailun Yang,

Xuming Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dongfang_2026_CVPR, author = {Dongfang, Zihao and Zheng, Xu and Weng, Ziqiao and Lyu, Yuanhuiyi and Paudel, Danda Pani and Van Gool, Luc and Yang, Kailun and Hu, Xuming}, title = {Are Multimodal Large Language Models Ready for Omnidirectional Spatial Reasoning?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {9759-9769} }
SCOPE: Spatially Ordered Continual Learning for 3D Segmentation: Wenhao Xu,

Huaidong Zhang,

Weipeng Zhang,

Qianle Zhang,

Shengfeng He; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Wenhao and Zhang, Huaidong and Zhang, Weipeng and Zhang, Qianle and He, Shengfeng}, title = {SCOPE: Spatially Ordered Continual Learning for 3D Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {7862-7871} }; Back