Findings
- Back
Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parmar_2026_CVPR, author = {Parmar, Paritosh and Peh, Eric and Fernando, Basura}, title = {Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5860-5870} }
Optimal-Transport-based Feature Alignment for Multimodal Change Detection-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Mengqi and Liu, Jun and Cui, Li and Duan, Yuping and Wang, Faqiang}, title = {Optimal-Transport-based Feature Alignment for Multimodal Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6342-6351} }
Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wimbauer_2026_CVPR, author = {Wimbauer, Felix and Manhardt, Fabian and Oechsle, Michael and Kalischek, Nikolai and Rupprecht, Christian and Cremers, Daniel and Tombari, Federico}, title = {Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4541-4551} }
Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Aimei and Cai, Yongxing and Liu, Bin and Sun, Jiale and Zhao, Guixin}, title = {Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5430-5439} }
How to Achieve Prototypical Birth and Death for OOD Detection?-
[pdf]
[arXiv]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Ningkang and Yu, Qianfeng and Peng, Xiaoqian and Qian, Linjing and Liu, Yafei and Xiao, Canran and Lu, Xinyu and Lu, Tingyu and Zheng, Zhichao and Gu, Yanhui}, title = {How to Achieve Prototypical Birth and Death for OOD Detection?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6466-6475} }
TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2026_CVPR, author = {Ma, Xu and Fu, Yun}, title = {TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6029-6039} }
Decoupled Scale-wise Autoregressive Modeling for Visual Generation-
[pdf]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Yaodong and Ruiz, Nataniel and Wang, Feng and Xie, Cihang}, title = {Decoupled Scale-wise Autoregressive Modeling for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4506-4515} }
Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4821-4831} }
Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6331-6341} }
OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Guowei and Bian, Yuxuan and Zeng, Ailing and Chen, Zhuo and Shi, Mingyi and Huang, Shaoli and Li, Wen and Duan, Lixin and Xu, Qiang}, title = {OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3641-3652} }
Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xuhai and Cen, Zhi and Pi, Huaijin and Peng, Sida and Zhou, Xiaowei and Liu, Yong}, title = {Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3598-3608} }
Block Cascading: Training Free Acceleration of Block-Causal Video Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2026_CVPR, author = {Bandyopadhyay, Hmrishav and Pinnaparaju, Nikhil and Entezari, Rahim and Scott, Jim and Song, Yi-Zhe and Jampani, Varun}, title = {Block Cascading: Training Free Acceleration of Block-Causal Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4078-4088} }
Group Relative Attention Guidance for Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanpu and Niu, Xuesong and Chen, Ruidong and Song, Dan and Zeng, Jianhao and Du, Penghui and Cao, Haoxiang and Wu, Kai and Liu, An-an}, title = {Group Relative Attention Guidance for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3840-3850} }
GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yihang and Hou, Xianxu and Shen, Linlin}, title = {GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3688-3697} }
EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Wan and Yan, Jing and Chen, Xiaojing and Shen, Ling and Lin, Chenhao and Diao, Yunfeng and Hong, Richang}, title = {EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6486-6495} }
Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2026_CVPR, author = {Deng, Keli and Qian, Yuntao}, title = {Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4940-4949} }
InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Dongchen and Zhang, Zilu and Huang, Leping and Sun, Yuyao and Zeng, Jianliang and Shu, Mao and Cao, Huo}, title = {InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5765-5775} }
Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2026_CVPR, author = {Bao, Lei and Chen, Hao and Chen, Yuyan and Wu, Kui and Chen, Lijia and Zhong, Fangwei and Huang, Feiran and Song, Bo and Yang, Han}, title = {Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4055-4066} }
Quantifying the Gap between Understanding and Generation within Unified Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Chenlong and Chen, Yuhang and Hu, Zhihan and Chen, Dongping and Chen, Wenhu and Wiegreffe, Sarah and Zhou, Tianyi}, title = {Quantifying the Gap between Understanding and Generation within Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5936-5946} }
Latent-Compressed Variational Autoencoder for Video Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2026_CVPR, author = {Guan, Jiarui and Zhao, Wenshuai and Zou, Zhengtao and Kannala, Juho and Solin, Arno}, title = {Latent-Compressed Variational Autoencoder for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3873-3883} }
CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Chuanshen and Zhou, Kai and Wang, Feiqi and Ning, Yutao and Xiong, Zhendong and Li, Yirui and Wen, Zhiquan and Tan, Mingkui}, title = {CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6208-6217} }
FLAIR: Frequency- and Locality-Aware Implicit Neural Representations-
[pdf]
[supp]
[bibtex]@InProceedings{Ko_2026_CVPR, author = {Ko, Sukhun and Youn, Seokhyun and Kye, Dahyeon and Min, Kyle and Eom, Chanho and Oh, Jihyong}, title = {FLAIR: Frequency- and Locality-Aware Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4877-4887} }
Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Runqi and Wang, Chuming and Yi, Fangqiu and Zhao, Yuying and Xu, Jingyu and Dai, Yuhang and Wang, Zheng and Zhang, Chi}, title = {Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3903-3913} }
Stochastic Perturbations Improve Distribution-to-Distribution Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Shiye and Zhang, Yuhui and Zhou, Linqi and Ranganath, Rajesh and Yeung-Levy, Serena}, title = {Stochastic Perturbations Improve Distribution-to-Distribution Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3965-3974} }
Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Yu, Yizhou}, title = {Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3799-3808} }
DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Donghong and Lin, Endian and Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Yang, Zhao and Zhu, Chuang}, title = {DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6538-6547} }
FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yali and Han, Qianru and He, Xinwei and Liu, Zhi and Xiang, Jinhai}, title = {FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5735-5744} }
LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Ruan_2026_CVPR, author = {Ruan, Wentao and Li, Xinhui and Cheng, Zhan and Fan, Cunhang and Tian, Libao and Lv, Zhao}, title = {LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4971-4981} }
OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Jinshu and Li, Xinghui and Bai, Xu and Ma, Tianxiang and Zhang, Pengze and Li, Mengtian and Chen, Zhuowei and Li, Gen and Liu, Lijie and Zhao, Songtao and Li, Bingchuan and He, Qian}, title = {OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4067-4077} }
HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Youngwan and Kim, Kangsan and Park, Kwanyong and Jung, Ilchae and Jang, Soojin and Lee, Seanie and Lee, Yong-Ju and Hwang, Sung Ju}, title = {HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5989-5998} }
BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Mincheol and Lim, HyunJin and Kang, Bomin and Park, Daehee}, title = {BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3470-3480} }
Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Li, Miaoge and Rao, Zhijie and Zeng, Deze and Guo, Song and Guo, Jingcai}, title = {Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3374-3383} }
InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hou_2026_CVPR, author = {Hou, Xianbao and He, Yonghao and Boukhers, Zeyd and See, John and Su, Hu and Sui, Wei and Yang, Cong}, title = {InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4300-4309} }
CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jing and Zhang, Dongbo and Zheng, Yalin and Meng, Yanda}, title = {CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6090-6099} }
B-MoE: A Body-Part-Aware Mixture-of-Experts "All Parts Matter" Approach to Micro-Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Poddar_2026_CVPR, author = {Poddar, Nishit and Reka, Aglind and Borza, Diana-Laura and Majhi, Snehashis and Balazia, Michal and Das, Abhijit and Br\'emond, Fran\c{c}ois}, title = {B-MoE: A Body-Part-Aware Mixture-of-Experts ''All Parts Matter'' Approach to Micro-Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3364-3373} }
M^4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Meihua and Tong, Xinyu and Yang, Li}, title = {M{\textasciicircum}4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5368-5377} }
Understanding Reward Hacking in Text-to-Image Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Yunqi and Kao, Kuei-Chun and Zhou, Hengguang and Hsieh, Cho-Jui}, title = {Understanding Reward Hacking in Text-to-Image Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4245-4255} }
POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Won_2026_CVPR, author = {Won, Jiyun and Yang, Heemin and Kim, Woohyeok and Ok, Jungseul and Cho, Sunghyun}, title = {POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4931-4939} }
DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Majeedi_2026_CVPR, author = {Majeedi, Abrar and Ruan, Zhiyuan and Zhao, Ziyi and Wang, Hongcheng and Lu, Jianglin and Li, Yin}, title = {DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5786-5795} }
S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion-
[pdf]
[bibtex]@InProceedings{Pei_2026_CVPR, author = {Pei, Jieyuan and Li, Wei and Li, Zhuoxuan and Zhu, Junwei and Lu, Meiyi and Jiang, Jiawei and Wang, Chenyu and Zheng, Jianwei}, title = {S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6281-6290} }
HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haonan and Liu, Shikun and Zhou, Zijian and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Schult, Jonas and He, Sen and Chen, Shoufa and Cong, Yuren and Xiang, Tao and Liu, Ziwei and Perez-Rua, Juan-Manuel}, title = {HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4603-4613} }
EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Rahul and Chauhan, Aditya and Kak, Avinash}, title = {EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6271-6280} }
AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chan_2026_CVPR, author = {Chan, Jiewen and Zhao, Zhenjun and Liu, Yu-Lun}, title = {AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4276-4289} }
MeMix: Multi-Encoder Mixture Framework for Medical Report Generation-
[pdf]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Cui, Lizhen and Shen, Zhiqi}, title = {MeMix: Multi-Encoder Mixture Framework for Medical Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5474-5483} }
PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Pengyu and Wang, Jiachuan and CHEN, Zhao and Cao, Caleb Chen and Wang, Liping and Jiang, Tingyi and Chen, Lei}, title = {PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5305-5314} }
M-PhyGs: Multi-Material Object Dynamics from Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2026_CVPR, author = {Wada, Norika and Yamashita, Kohei and Kawahara, Ryo and Nishino, Ko}, title = {M-PhyGs: Multi-Material Object Dynamics from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6404-6413} }
TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2026_CVPR, author = {Zou, Liangshun and Ni, Zhangkai and Wang, Hanli}, title = {TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4129-4139} }
Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Belhe_2026_CVPR, author = {Belhe, Yash and Chang, Wesley and Li, Tzu-Mao and Ramamoorthi, Ravi and Gharbi, Micha\"el}, title = {Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4089-4096} }
Learning to Translate Noise for Robust Image Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ha_2026_CVPR, author = {Ha, Inju and Ryou, Donghun and Seo, Seonguk and Han, Bohyung}, title = {Learning to Translate Noise for Robust Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5034-5043} }
UGLMM: Towards Unified Vision Grounding with Large Multimodal Model-
[pdf]
[supp]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Xiangheng and Zhou, Li and Sun, Zenghui and Dong, Shichao and Sang, Nong and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Gao, Changxin and Zhang, Kaifu}, title = {UGLMM: Towards Unified Vision Grounding with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5683-5693} }
VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Naishan and Guo, Qingpei and Huang, Jie and Zhao, Feng}, title = {VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5947-5957} }
Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yunpeng and Hou, Xingzhong and Wu, Jie and Liu, Boxiao and Zhang, Yi and Song, Guanglu and Liu, Yu and Tian, Changyao and Luo, Gen and You, Haihang}, title = {Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4678-4687} }
DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Junxiang and Lin, Junming and Zhou, Jie and Xiong, Wei and Li, Jiangtong and Li, Jie and Zhuang, Jie and Ji, Hongfei}, title = {DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5378-5387} }
TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiawei and Zhou, Qiangqiang and Zhu, Dandan and Chen, Yong and Yi, Yugen and Zhao, Xiaoqi}, title = {TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5452-5462} }
Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Linxi and Sun, Lisong C. and Neall, Ashley and Wu, Tong and Cai, Shengqu and Wetzstein, Gordon}, title = {Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3998-4008} }
Parallel In-context Learning for Large Vision Language Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Chijiwa, Daiki and Sakao, Tamao and Hasegawa, Taku}, title = {Parallel In-context Learning for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5796-5806} }
Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Kuo and Lu, Jianglin and Fu, Yun}, title = {Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5704-5713} }
mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Haofan and Cao, Yadi and Yi, Wanghao and Abari, Omid}, title = {mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6435-6444} }
MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yiquan and Yeon, Taeyoung and Gao, Chenfeng and Xu, Vasco and Liu, Xuanyou and Ahuja, Karan}, title = {MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3460-3469} }
A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5255-5264} }
Deep Parameter Interpolation for Scalar Conditioning-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Chicago Y. and McCann, Michael T. and Garcia-Cardona, Cristina and Wohlberg, Brendt and Kamilov, Ulugbek S.}, title = {Deep Parameter Interpolation for Scalar Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3884-3892} }
Zero-Shot Textual Explanations via Translating Decision-Critical Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Zero-Shot Textual Explanations via Translating Decision-Critical Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3282-3292} }
Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Jingxuan and Zhang, Ziqi and Zheng, Shaoming and Li, Shuang and Patel, Uday Bharat and Fitzhugh, Alexander Harry and Lung, Phillip and Kiberu, Yusuf and Jathanna, Nikesh and Jamil-Copley, Shahnaz and Kainz, Bernhard and Qin, Chen}, title = {Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5224-5234} }
LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Fu_2026_CVPR, author = {Fu, Chen and Yi, Shengzhou and Xiao, Ling and Yamasaki, Toshihiko}, title = {LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5725-5734} }
FlowSteer: Conditioning Flow Field for Consistent Image Restoration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wickremasinghe_2026_CVPR, author = {Wickremasinghe, Tharindu and Qi, Chenyang and Weligampola, Harshana and Tu, Zhengzhong and Chan, Stanley H.}, title = {FlowSteer: Conditioning Flow Field for Consistent Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5106-5116} }
DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Dongzhi and Zhang, Renrui and Li, Haodong and Zong, Zhuofan and Guo, Ziyu and He, Jun and Guo, Claire and Ye, Junyan and Fang, Rongyao and Li, Weijia and Liu, Rui and Li, Hongsheng}, title = {DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5903-5914} }
RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxin and Dong, Yiqing and Yu, Wenxue and Wu, Zhan and Ge, Rongjun and Chen, Yang and He, Yuting}, title = {RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5673-5682} }
HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection-
[pdf]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Yixin and Liu, Yu and Wang, Weimin and Guo, Yanming and Jia, Qi}, title = {HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6517-6527} }
Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Lei_2026_CVPR, author = {Lei, Shuangming and Huang, Yuehao and Yi, Yao and Xie, Yijia and Wang, Jingke and Wang, Ruoyu and Lv, Jiajun and Xu, Guanglin and Ye, AiXue and Liu, Bingbing and Cheng, Siyuan and Zhang, Hongbo and Ma, Yukai and Liu, Yong}, title = {Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4552-4561} }
Discovering Attention Head Interactions in Vision Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Jia, Yuheng and You, Wei and Chen, Hao}, title = {Discovering Attention Head Interactions in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3332-3342} }
SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duong_2026_CVPR, author = {Duong, Huy and Nguyen, Trong-Tung and Pham, Cuong and Tran, Anh and Nguyen, Khoi and Hoai, Minh}, title = {SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4708-4718} }
When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Jianxin and Zhu, Chunzheng and Kneuertz, Peter J and Bai, Yunfei and Xue, Yuan}, title = {When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5556-5568} }
MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chen and Wu, Tao and Liu, Wei and Wu, Xi and Fu, Ying}, title = {MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4855-4865} }
FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Fangjing and Wang, Zhihai and Ding, Xinxin and Liu, Haiyang and Gao, Ronghua and Wang, Rong and Zhu, Yao and Jin, Ming}, title = {FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3620-3629} }
ZODS-RS -- Zero-Training Oriented Detection & Segmentation for Remote Sensing-
[pdf]
[bibtex]@InProceedings{Gu_2026_CVPR, author = {Gu, Zuan and Gao, Tianhan and Zhao, Langxu}, title = {ZODS-RS -- Zero-Training Oriented Detection \& Segmentation for Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6322-6330} }
Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Jingyun and Li, Dian and Wang, Xiaohan and Liu, Gang and Yan, Jiahong and Kang, Guoliang}, title = {Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5958-5967} }
Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiwen and Zheng, Kaili and Shi, Yiming and Guo, Chenyi and Wu, Ji}, title = {Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3532-3542} }
Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification-
[pdf]
[bibtex]@InProceedings{Mohanta_2026_CVPR, author = {Mohanta, Anindita and Roy, Sourav Dey and Saha, Priya and Nath, Niharika and Bhowmik, Mrinal Kanti}, title = {Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5525-5534} }
LiteEmbed: Adapting CLIP to Rare Classes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {LiteEmbed: Adapting CLIP to Rare Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6133-6142} }
HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Saeteros_2026_CVPR, author = {Saeteros, Juan M. and Ar\'evalo, Nick J. and Vintimilla, Boris X.}, title = {HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6197-6207} }
Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Xinyu and Chen, Jiuchen and Xu, Qizhi}, title = {Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5086-5095} }
UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guanjun and Fang, Jiemin and Yang, Chen and Li, Sikuang and Yi, Taoran and Lu, Jia and Zhou, Zanwei and Cen, Jiazhong and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Wang, Xinggang and Tian, Qi}, title = {UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4366-4378} }
IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhufeng and Gao, Xuan and Liu, Feng-Lin and Zhang, Haoxian and Fang, Zhixue and Lai, Yu-Kun and Liu, Xiaoqiang and Wan, Pengfei and Gao, Lin}, title = {IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4635-4646} }
Is Your Text-to-Image Model Robust to Caption Noise?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2026_CVPR, author = {Yu, Weichen and Yang, Ziyan and Lin, Shanchuan and Zhao, Qi and Wang, Jianyi and Gui, Liangke and Fredrikson, Matt and Jiang, Lu}, title = {Is Your Text-to-Image Model Robust to Caption Noise?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3789-3798} }
SyntheticManga: Training-Free Manga Generation with Phased Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2026_CVPR, author = {Peng, Xuelei and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SyntheticManga: Training-Free Manga Generation with Phased Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4410-4418} }
FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5024-5033} }
Adversarial Concept Distillation for One-Step Diffusion Personalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Yixiong and Wu, Tao and Li, Senmao and Yang, Shiqi and Wang, Yaxing and van de Weijer, Joost and Wang, Kai}, title = {Adversarial Concept Distillation for One-Step Diffusion Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4321-4333} }
Bootstrapping Sign Language Annotations with Sign Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lea_2026_CVPR, author = {Lea, Colin and Baltatzis, Vasileios and Gillis, Connor and Kushalnagar, Raja and Quandt, Lorna and Findlater, Leah}, title = {Bootstrapping Sign Language Annotations with Sign Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3630-3640} }
SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, You and Zhu, Beier and Zhang, Chi}, title = {SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4759-4768} }
FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Guoyang and Ding, Yifeng and Li, Fengfa and Ren, Lei and Chen, Wei and Feng, Fangxiang and Wang, Xiaojie}, title = {FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5915-5924} }
Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Zheng, Shurong and Tang, Ming and Wang, Jinqiao}, title = {Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5807-5817} }
MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Thuy Truong and Do, Minh Kha and Duy, Phuc Nguyen and Lee, Min Hun}, title = {MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5168-5178} }
USV: Unified Sparsification for Accelerating Video Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Xinjian and Wang, Hongmei and Zhou, Yuan and Lu, Qinglin}, title = {USV: Unified Sparsification for Accelerating Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4180-4189} }
CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Li, Yachun and Du, Hang and Yang, Shicai and Xie, Di and Zhu, Jiang and Yang, Yang}, title = {CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4888-4897} }
A Diagnostic Study of Region-Based Representations in Multimodal LLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ji and Cao, Shengcao and Wang, Yu-Xiong}, title = {A Diagnostic Study of Region-Based Representations in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5978-5988} }
Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet-
[pdf]
[supp]
[bibtex]@InProceedings{Xin_2026_CVPR, author = {Xin, Yue and Zheng, Ziyang and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5235-5244} }
HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Yihan and Li, Sijing and Wang, Zhuonan and Lin, Tianwei and Yang, Chenglin and Zhong, Yu and Yan, Wenjie and Zhang, Wenqiao and Guo, Xiaogang and Xiao, Jun and Zhuang, Yueting and Ooi, Beng Chin}, title = {HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6228-6238} }
Rethinking VLMs for Image Forgery Detection and Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Shaofeng and Cui, Jiequan and Hong, Richang}, title = {Rethinking VLMs for Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5828-5837} }
MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2026_CVPR, author = {Luo, Jiachen and He, Jiajun and Shen, Shuai and Wang, Lin and Phan, Huy and Reiss, Joshua and Haijun, Lin and Schuller, Bjoern and Fu, Zeyu and Song, Siyang}, title = {MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3354-3363} }
Towards Calibrated Gradient-based Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2026_CVPR, author = {Cao, Linxiao and Yang, Mianzimei and Zhou, Zhipeng and Xie, Hong and Lian, Defu and Yang, Menglin}, title = {Towards Calibrated Gradient-based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5127-5136} }
NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5399-5408} }
C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaofei and Price, Stephen J and Li, Chao}, title = {C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5463-5473} }
DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Thanh-Tung and Zhao, Yunhan and Chai, Menglei and Shen, Zhengyang and Cao, Zhe and Tang, Danhang and Xie, Xiaohui and Kong, Deying}, title = {DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4334-4344} }
If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barbano_2026_CVPR, author = {Barbano, Carlo Alberto and Molinaro, Luca and Ciranni, Massimiliano and Aiello, Emanuele and Pastore, Vito Paolo and Grangetto, Marco}, title = {If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6122-6132} }
HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Liangjie and Wenjie, Liao and Feng, Ming and Song, Xiaohui and Li, Huafei and Lu, Haonan}, title = {HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6072-6079} }
Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Ji and Peng, Bo and Li, Suping and Zhang, Qianni}, title = {Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5620-5631} }
HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xiaoqi and A Dos Santos, Jefersson and Han, Jungong}, title = {HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6352-6360} }
Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers-
[pdf]
[supp]
[bibtex]@InProceedings{Eltahir_2026_CVPR, author = {Eltahir, Mohamed and Habibullah, Ali and Ayash, Lama and Hussain, Tanveer and Khan, Naeemullah}, title = {Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6496-6505} }
PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Min and Xu, Gangwei and Wang, Xianqi and Peng, Yuyi and Yang, Xin}, title = {PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4920-4930} }
Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Xinpeng and Zhang, Min and Han, Kairong and Tan, Xu and Wu, Fei and Kuang, Kun}, title = {Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6100-6110} }
R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data-
[pdf]
[supp]
[bibtex]@InProceedings{Raza_2026_CVPR, author = {Raza, Wajih Hassan and Schiess, Mya and Lemus, Juan Martinez and Ellmore, Timothy Michael and Green, Charles and Soto, Claudio and Fu, Xin and Hu, Renjie}, title = {R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5776-5785} }
Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Borgavi_2026_CVPR, author = {Borgavi, Kaustubh R and Shashikumar, Sarvesh and Arora, Chetan}, title = {Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6570-6581} }
TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Belagali_2026_CVPR, author = {Belagali, Varun and Kapse, Saarthak and Marza, Pierre and Das, Srijan and Li, Zilinghan and Boutaj, Sofi\`ene and Pati, Pushpak and Yellapragada, Srikar and Nandi, Tarak Nath and Madduri, Ravi K and Saltz, Joel and Prasanna, Prateek and Christodoulidis, Stergios and Vakalopoulou, Maria and Samaras, Dimitris}, title = {TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5440-5451} }
CLASH: A Benchmark for Cross-Modal Contradiction Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Popordanoska_2026_CVPR, author = {Popordanoska, Teodora and Li, Jiameng and Blaschko, Matthew B.}, title = {CLASH: A Benchmark for Cross-Modal Contradiction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6051-6061} }
GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Ziyi and Rao, Zejing and Cao, Juan and Liu, Xiaoqiang and Fang, Zhixue and Zhang, Haoxian and Tang, Songlin and Tang, Fan}, title = {GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3739-3748} }
UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuai and Shi, Zhecheng and Li, Zhuoxiao and Ou, Jing and Wang, Tengxi and Liu, Yuan and Zhao, Wufan}, title = {UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6382-6393} }
Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yexiong and Yao, Yu and Zhou, Yang and Liu, Tongliang}, title = {Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3955-3964} }
All-Age Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Bravo-Sanchez_2026_CVPR, author = {Bravo-S\'anchez, Laura and Armando, Matthieu and Br\'egier, Romain and Rogez, Gr\'egory and Yeung-Levy, Serena and Baradel, Fabien}, title = {All-Age Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3677-3687} }
LlamaRG: A Multi-View Large Language Model for Radiology Report Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Jayas_2026_CVPR, author = {Jayas, Tanuja and Rastogi, Aditya and Raghavan, Pavithra and Brugnara, Gianluca and Schlamp, Kai and Foltyn-Dumitru, Martha and Vollmuth, Philipp}, title = {LlamaRG: A Multi-View Large Language Model for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5745-5754} }
Detecting Precise Hand Touch Moments in Egocentric Video-
[pdf]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Anh and Dayoub, Feras and Hoai, Minh}, title = {Detecting Precise Hand Touch Moments in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3565-3574} }
CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zelin and Gong, Xinyu and Liu, Bangya and Song, Ziyang and Zhang, Jun and Wu, Suhui and Chen, Yongxin and Zhang, Hao}, title = {CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4749-4758} }
Rethinking Training Dynamics in Scale-Wise Autoregressive Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Ge, Chongjian and Tan, Hao and Liu, Feng and Hong, Yicong}, title = {Rethinking Training Dynamics in Scale-Wise Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4593-4602} }
Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaoyang and Wu, Shangzhe and Han, Kai}, title = {Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6456-6465} }
OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Xiwen and Zhu, Wenhui and Li, Gen and Dong, Xuanzhao and Xiong, Yujian and Wang, Hao and Qiu, Peijie and Song, Qingquan and Wang, Zhipeng and Tang, Shao and Wang, Yalin and Razi, Abolfazl}, title = {OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5849-5859} }
Consistent Video Editing as Flow-Driven Image-to-Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Ge and Fan, Songlin and Liu, Hangxu and Song, Quanjian and Wang, Hewei and Xu, Jinfeng}, title = {Consistent Video Editing as Flow-Driven Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4625-4634} }
DMin: Scalable Training Data Influence Estimation for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Huawei and Lao, Yingjie and Zhao, Weijie}, title = {DMin: Scalable Training Data Influence Estimation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3293-3302} }
Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoran and Wang, Liyan and Jin, Yeying and Lam, Kin-man and Su, Zhixun and Yang, Yang and Pan, Jinshan and Wang, Cong}, title = {Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4832-4842} }
Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels-
[pdf]
[supp]
[bibtex]@InProceedings{Pilligua_2026_CVPR, author = {Pilligua, Maria and Serrano-Lozano, David and Peng, Pai and Baldrich, Ramon and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5014-5023} }
Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeon_2026_CVPR, author = {Jeon, Inseok and Lee, Minhyeok and Lee, Seunghoon and Kang, Minseok and Cho, Suhwan and Lee, Sangyoun}, title = {Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4266-4275} }
ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection-
[pdf]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Chuangchuang and Wang, Jinglu and Ming, Xiang and Tao, Renshuai and Wei, Yunchao and Zhao, Yao and Lu, Yan}, title = {ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6592-6601} }
Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Tuopusen and Ma, Ding and Wu, Xiangqian}, title = {Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5409-5418} }
DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiyu and Wang, Bing and Li, Changchun and Lu, You and Wang, Yaning and Zhang, Huimao and Li, Ximing}, title = {DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5357-5367} }
THOM: Generating Physically Plausible Hand-Object Meshes From Text-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2026_CVPR, author = {Jeong, Uyoung and Tiruneh, Yihalem Yimolal and Chang, Hyung Jin and Baek, Seungryul and Kim, Kwang In}, title = {THOM: Generating Physically Plausible Hand-Object Meshes From Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3653-3664} }
A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation-
[pdf]
[bibtex]@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xinpan and Huang, Mingzhu and Hua, Liujie and Ju, Jianuo and Zhao, Xiaowei and Wu, Lin Yuanbo}, title = {A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5545-5555} }
Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios-
[pdf]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Zhongzhen and Mu, Linjie and Gu, Yannian and Hu, Kangzhe and Hua, Shengyi and Zhang, Xiaofan}, title = {Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5609-5619} }
PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Damm_2026_CVPR, author = {Damm, Simon and Ricker, Jonas and Petzka, Henning and Fischer, Asja}, title = {PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6506-6516} }
Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Liu, Kun and Li, Min}, title = {Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5505-5514} }
RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qi and Chen, Jun and Tsang, Ivor and Dai, Guang}, title = {RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4698-4707} }
BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Wang, Zitong and Li, Jiaxing and Zheng, Weihuang and Kong, Youyong}, title = {BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5662-5672} }
Self-Guided Integrated Gradient Method for Attribution-
[pdf]
[supp]
[bibtex]@InProceedings{Henry_2026_CVPR, author = {Henry, Sabrina and Ruget, Alice and Scholes, Stirling and Leach, Jonathan}, title = {Self-Guided Integrated Gradient Method for Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3312-3321} }
VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyue and Foo, Lin Geng and Beeler, Thabo and Dabral, Rishabh and Theobalt, Christian}, title = {VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4009-4021} }
UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Chunming and Zhang, Rihan and Xiao, Fengyang and Fang, Chengyu and Tang, Longxiang and Zhang, Rui and Farsiu, Sina}, title = {UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5003-5013} }
UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Hebeizi and Sun, Benyuan and Yang, Yi and Liang, Zihao and Yin, Zihao and Sha, Xiao and Wang, Chenliang}, title = {UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4647-4656} }
CADReasoner: Iterative Program Editing for CAD Reverse Engineering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kabisov_2026_CVPR, author = {Kabisov, Soslan and Kirichuk, Vsevolod and Volkov, Andrey and Barannikov, Marina and Savrasov, Gennadiy and Konushin, Anton and Kuznetsov, Andrey and Zhemchuzhnikov, Dmitrii}, title = {CADReasoner: Iterative Program Editing for CAD Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6143-6153} }
MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Guile and Huang, David and Bai, Dongfeng and Liu, Bingbing}, title = {MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4290-4299} }
S^2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Wu, Yushu and Lebedev, Aleksei and Lahiri, Dishani and Dong, Meng and Sahni, Arpit and Vasilkovsky, Michael and Chen, Hao and Hu, Ju and Siarohin, Aliaksandr and Tulyakov, Sergey and Wang, Yanzhi and Kag, Anil and Li, Yanyu}, title = {S{\textasciicircum}2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4355-4365} }
VeCoR -- Velocity Contrastive Regularization for Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2026_CVPR, author = {Hong, Zong-Wei and Li, Jing-Lun and Li, Lin-Ze and Zhang, Shen and Tang, Yao}, title = {VeCoR -- Velocity Contrastive Regularization for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4739-4748} }
Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Li, Chenjia and Unberath, Mathias}, title = {Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3945-3954} }
GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Shuhuai and Pei, Songwei and Liu, Bingfeng and Huang, Yuanzhou and Li, Qian and Wang, Shangguang}, title = {GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6312-6321} }
Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Jiyang and Liu, Rui and Dai, Hang}, title = {Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6612-6621} }
OminPSD: Layered PSD Generation with Diffusion Transformer-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Cheng and Song, Yiren and Wang, Haofan and Shou, Mike Zheng}, title = {OminPSD: Layered PSD Generation with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4190-4201} }
Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shihao and Huang, Huaibo and Zheng, Aihua and Tang, Jin and He, Ran}, title = {Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6560-6569} }
Less is More: Multimodal Human Pose Estimation with Selective Fusion-
[pdf]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Yutong and Huang, Qianyi and Chen, Xu}, title = {Less is More: Multimodal Human Pose Estimation with Selective Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3575-3584} }
DenoiseGS: Gaussian Reconstruction Model for Burst Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yongsen and Cai, Yuanhao and Zhang, Yulun}, title = {DenoiseGS: Gaussian Reconstruction Model for Burst Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5096-5105} }
Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zeqi and Zhao, Yiwei and Li, Lingxiao and Lan, Yushi and Yu, Ning and Garg, Rahul and Taghavi, Mohammad H. and Pan, Xingang}, title = {Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3933-3944} }
Optical Tolerance-Compensated Diffusion Model for Image Restoration-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Hongji and Gong, Huihui and Zuo, Tanli and Zhao, Yu and Dai, Jin and Tian, Jingduo and Ni, Kai}, title = {Optical Tolerance-Compensated Diffusion Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5064-5074} }
EI: Early Intervention for Multimodal Imaging Based Disease Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Qijie and Lin, HaiLan and Li, Xirong}, title = {EI: Early Intervention for Multimodal Imaging Based Disease Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5632-5640} }
Cross-Resolution Diffusion Models Via Network Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2026_CVPR, author = {Ren, Jiaxuan and Zhu, Junhan and Wang, Huan}, title = {Cross-Resolution Diffusion Models Via Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4224-4233} }
GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaochen and Xi, Xiaoting and Yin, Chao and Li, Xiaoqiang and Dong, Daoguo}, title = {GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3759-3768} }
One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Chen, Chen and Gu, Jiatao}, title = {One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4688-4697} }
Eevee: Towards Close-up High-resolution Video-based Virtual Try-on-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2026_CVPR, author = {Zeng, Jianhao and Bai, Yancheng and Chen, Ruidong and Zhang, Xuanpu and Sun, Lei and Jin, Dongyang and Xu, Ryan and Zhang, Nannan and Song, Dan and Chu, Xiangxiang}, title = {Eevee: Towards Close-up High-resolution Video-based Virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4614-4624} }
NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2026_CVPR, author = {Xu, Zhenyu and Shen, Xiaoqi and Nan, Haotian and Zhang, Xinyu}, title = {NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4389-4399} }
AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Shijie and Chen, Yiming and Gong, Yingyun and Zhou, Hongwen and Chen, Feng-Jung and Gao, Xieping and Chen, Zhineng}, title = {AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5336-5346} }
EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeyoung and Kim, Hyeondong and Kim, Yujin and Park, Daehee}, title = {EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3521-3531} }
VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2026_CVPR, author = {Lai, Jinxiang and Lu, Zexin and He, Jiajun and Quan, Rongwei and Zhao, Wenzhe and Yang, Qinyu and Chen, Qi and Lin, Qin and Li, Chuyue and Gao, Tao and Shan, Yuhao and Guo, Song and Lu, Qinglin}, title = {VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4140-4149} }
PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Sooyeon and Park, Jaeil and Cho, Sung-Bae}, title = {PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4800-4809} }
QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Donglin and Vicol, Paul and Qi, Xiaojuan and Liao, Renjie and Zhang, Xiaofan}, title = {QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5044-5053} }
Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Zhihua and Chang, Haolin and Miao, Guohua and Chen, Jianing}, title = {Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3428-3437} }
OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views-
[pdf]
[bibtex]@InProceedings{Qiao_2026_CVPR, author = {Qiao, Qian and Liu, Wenye and Liu, Ting and Shu, Jiuhe and Wang, Peng}, title = {OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6394-6403} }
PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yedi and Huang, Wenhui and Zheng, Yuanjie}, title = {PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5535-5544} }
PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2026_CVPR, author = {Fang, Zining and Xue, Cheng and Liu, Chunhui and Xu, Bin and Chen, Ming and Hu, Xiaowei}, title = {PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5347-5356} }
Do Audio-Visual Large Language Models Really See and Hear?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Selvakumar_2026_CVPR, author = {Selvakumar, Ramaneswaran and Jayakumar, Kaousheik and Sakshi, S and Ghosh, Sreyan and Gao, Ruohan and Manocha, Dinesh}, title = {Do Audio-Visual Large Language Models Really See and Hear?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5892-5902} }
Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models-
[pdf]
[supp]
[bibtex]@InProceedings{El-Ghoussani_2026_CVPR, author = {El-Ghoussani, Amir and H\"olle, Marc and Carneiro, Gustavo and Belagiannis, Vasileios}, title = {Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4810-4820} }
Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Cong, Wuyang and Lu, Ming and Xu, Bowei and Ma, Zhan}, title = {Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4843-4854} }
Depth Adaptive Efficient Visual Autoregressive Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Chunliang and Cao, Tianze and Zhao, Sanyuan}, title = {Depth Adaptive Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4213-4223} }
Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zeng, Linqian and Zhou, Yijin and Li, Hengjie and Zhai, Jidong}, title = {Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4486-4494} }
Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yong and Zhang, Weiyu and Dai, Ling and Yang, Jian and Yin, Dacheng and Li, Sirun and Lyu, Jing and Rao, Fengyun and Zhang, Fan}, title = {Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6302-6311} }
Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xiangyue and Wang, Xiaoyang and Yao, Siyue and Sun, Mingjie and Wu, Yupei}, title = {Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4345-4354} }
ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement-
[pdf]
[supp]
[bibtex]@InProceedings{Du_2026_CVPR, author = {Du, Zhongjing and Chen, Xiao and Nie, Zhiwei and Chen, Yuxuan and Liu, Chang and Ji, Xiangyang and Chen, Jie}, title = {ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3851-3860} }
P^2CS: Parallel Point Cloud Pre-Training with Semantic Consistency-
[pdf]
[bibtex]@InProceedings{Diao_2026_CVPR, author = {Diao, Linshuang and Song, Sensen and Jia, Yuan and Qian, Yurong and Ren, Dayong}, title = {P{\textasciicircum}2CS: Parallel Point Cloud Pre-Training with Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5117-5126} }
PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Tang, Jinzhou and Liu, Sidi and Wang, Jian and Wang, Keze}, title = {PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3585-3597} }
OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors-
[pdf]
[supp]
[bibtex]@InProceedings{Vuillecard_2026_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3553-3564} }
How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Xiang and Hu, Jinfan and You, Zhiyuan and Yan, Kainan and Tang, Yu and Dong, Chao and Gu, Jinjin}, title = {How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4909-4919} }
VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation-
[pdf]
[bibtex]@InProceedings{Xiong_2026_CVPR, author = {Xiong, Junwen and Li, Chuanyue and Zhang, Peng}, title = {VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3543-3552} }
PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT-
[pdf]
[supp]
[bibtex]@InProceedings{Kang_2026_CVPR, author = {Kang, Borui and Qin, Guanyi and Li, Chuanpu and Jin, Yueming}, title = {PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5214-5223} }
Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jie and Xin, Yu and Li, Guoqing}, title = {Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5245-5254} }
Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sen_2026_CVPR, author = {Sen, Souptik and Younis, Raneen and Ahmadi, Zahra}, title = {Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6080-6089} }
HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Shiyu and Jiang, Kui and Jiang, Junjun and Liu, Xianming and Feng, Xiaocheng and Ma, Fei and Yao, Hongxun and Tian, Qi}, title = {HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3729-3738} }
M^3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis-
[pdf]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Zhang, Xiaotong and Li, Jiaxing and Li, Yueying and Wei, Jiayin and Kong, Youyong}, title = {M{\textasciicircum}3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5419-5429} }
Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hongbo and Bozorgpour, Afshin and Merhof, Dorit and Zhang, Minjia}, title = {Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5579-5588} }
HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2026_CVPR, author = {Lin, Yueqian and Zhang, Jingyang and Wang, Qinsi and Ye, Hancheng and Fu, Yuzhe and Liu, Yudong and Li, Hai Helen and Chen, Yiran}, title = {HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5968-5977} }
FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Xinwan and Li, Bowen and Luo, Jiajun and Li, Ye and Wang, Zhi}, title = {FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4097-4107} }
Controllable Radar Simulation with Waveform Parameter Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Weiqing and Huang, Hao and Zhong, Chonghao and Lin, Yujie and Wang, Nan and Chen, Xiaoxue and Chen, Zhaoxi and Zhang, Saining and Yang, Shuocheng and Merriaux, Pierre and Lei, Lei and Zhao, Hao}, title = {Controllable Radar Simulation with Waveform Parameter Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6424-6434} }
DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Son and Tran, Thanh V. T. and Choi, Jeongsoo and Huynh-Nguyen, Hieu-Nghia and Hy, Truong-Son and Nguyen, Van}, title = {DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5838-5848} }
PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichen}, title = {PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4866-4876} }
ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs-
[pdf]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yuncheng and Feng, Chun-Mei and Sun, Rui and Zhang, Le}, title = {ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5275-5284} }
Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Davis, James C. and Lu, Yung-Hsiang}, title = {Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4118-4128} }
How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking-
[pdf]
[bibtex]@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Huber, Stephan and Koller, Oscar and Duarte, Amanda}, title = {How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3501-3509} }
Learning Predictive Visuomotor Coordination-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Lai, Bolin and Cao, Xu and Liu, Miao and Xu, Danfei and Rehg, James M.}, title = {Learning Predictive Visuomotor Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3609-3619} }
Generative Visual Chain-of-Thought for Image Editing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yin_2026_CVPR, author = {Yin, Zijin and Hang, Tiankai and Cheng, Yiji and Zhang, Shiyi and He, Runze and Xu, Yu and Wang, Chunyu and Li, Bing and Chang, Zheng and Liang, Kongming and Lu, Qinglin and Ma, Zhanyu}, title = {Generative Visual Chain-of-Thought for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4657-4667} }
Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion-
[pdf]
[bibtex]@InProceedings{Xie_2026_CVPR, author = {Xie, Feiyang and Yuan, Haoqi and Lu, Zongqing}, title = {Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3384-3393} }
Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation-
[pdf]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziwei and Tan, Dayu and Peng, Xin and Zhong, Weimin}, title = {Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5158-5167} }
UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyang and Wang, Le and Zhou, Sanping and Wu, Yuxuan and Sun, Xiaolong and Hua, Gang and Li, Haoxiang}, title = {UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4668-4677} }
AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Cencen and Zhang, Dongyang and Yin, Wen and Wang, Jielei and Li, Tianyu and Guo, Ji and Jiang, Wenbo and Wang, Guoqing and Lu, Guoming}, title = {AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5054-5063} }
Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2026_CVPR, author = {Guo, Xiao and Zhang, Yue and Bansal, Mohit and Liu, Xiaoming}, title = {Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4898-4908} }
CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection-
[pdf]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Haitian and Fang, Juan and Zhu, Yiren and Zhao, Xudong and Guo, Yufei and Zhang, Xiaohan and Hu, Xiaoxing and Yang, Xue and Ming, Qi}, title = {CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6361-6370} }
GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aboukhadra_2026_CVPR, author = {Aboukhadra, Ahmed Tawfik and Rogge, Marcel and Robertini, Nadia and Arafa, Abdalla and Malik, Jameel and Elhayek, Ahmed and Stricker, Didier}, title = {GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3394-3404} }
Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yishu and Wu, Shushan and Zhang, Zhenzhong and Li, Didong and Yao, Huaxiu and Li, Yun and Carmichael, Iain and A Hoadley, Katherine and Zhu, Hongtu and Wu, Di and Zhang, Daiwei}, title = {Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5201-5213} }
PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyang and Zhang, Huiqin and Zhang, Yanduo and Ma, Jiayi and Jiang, Junjun and Zhou, Huabing}, title = {PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4993-5002} }
Anticipatory Planning for Multimodal AI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2026_CVPR, author = {Liang, Yongyuan and Zhou, Shijie and Gu, Yu and Tan, Hao and Wu, Gang and Dernoncourt, Franck and Kil, Jihyung and Rossi, Ryan A. and Zhang, Ruiyi}, title = {Anticipatory Planning for Multimodal AI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5925-5935} }
ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Oh, Hyunwoo and Cha, SeungJu and Koh, Sungho and Kim, Dong-Jin}, title = {ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4562-4571} }
Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Feng_2026_CVPR, author = {Feng, Shuai and Ge, Yuxin and Zhang, Baoming and Du, Yuntao and Chen, MingCai and Wang, Chongjun and Feng, Lei}, title = {Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6602-6611} }
Scaling Pre-training to One Hundred Billion Data for Vision Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Alabdulmohsin, Ibrahim and Salz, Daniel and Li, Zhe and Rong, Keran and Zhai, Xiaohua}, title = {Scaling Pre-training to One Hundred Billion Data for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6185-6196} }
V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2026_CVPR, author = {Tang, Bingda and Zhang, Yuhui and Wang, Xiaohan and Mao, Jiayuan and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3769-3778} }
SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Qi and Cong, Peishan and Wang, Ziyi and Sun, Yujing and Sun, Qin and Zhu, Xinge and Ye, Mao and Yang, Ruigang and Ma, Yuexin}, title = {SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3510-3520} }
Materialistic RIR: Material Conditioned Realistic RIR Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saad_2026_CVPR, author = {Saad, Mahnoor Fatima and Majumder, Sagnik and Grauman, Kristen and Al-Halah, Ziad}, title = {Materialistic RIR: Material Conditioned Realistic RIR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5871-5881} }
Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem-
[pdf]
[bibtex]@InProceedings{Su_2026_CVPR, author = {Su, Dawei and Fang, Zhanhong and Luo, Junyi and Wang, Debing and Chen, Jinbiao and Zhang, Zizhen}, title = {Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6261-6270} }
Value bounds and Convergence Analysis for Averages of LRP attributions-
[pdf]
[supp]
[bibtex]@InProceedings{Binder_2026_CVPR, author = {Binder, Alexander and Takmil-Homayouni, Nastaran and Dogan, Urun}, title = {Value bounds and Convergence Analysis for Averages of LRP attributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3343-3353} }
DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions-
[pdf]
[bibtex]@InProceedings{Shi_2026_CVPR, author = {Shi, Minghai and Zhang, Xiaoxian and Liu, Xiaoyue and Yang, Fan and Li, Lei}, title = {DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5265-5274} }
SFS-DETR: Spatial-Frequency Selection for UAV Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Jia_2026_CVPR, author = {Jia, Dingding and Wang, Jiankang and Zhang, Longlong and Liu, Zhiheng and Wang, Xuan}, title = {SFS-DETR: Spatial-Frequency Selection for UAV Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6582-6591} }
Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2026_CVPR, author = {Nam, Sanghyeok and Kim, Byoungjun and Park, Daehyung and Kim, Tae-Kyun}, title = {Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3698-3708} }
Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing-
[pdf]
[bibtex]@InProceedings{Shao_2026_CVPR, author = {Shao, Hao and Liu, Liyang and Luo, Zhengxiong and Zong, Zhuofan and Li, Hongsheng}, title = {Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3893-3902} }
DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jianqin and Wang, Peng and Huang, Junming and Zhou, Xue and Yu, Li}, title = {DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6062-6071} }
PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Ziniu and Zhou, Shuheng and Liu, Mingqing and Deng, Hao and Zhu, Huijia}, title = {PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6174-6184} }
From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lysa and Liesaputra, Veronica and Szymanski, Lech and Cranefield, Stephen}, title = {From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5882-5891} }
UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection-
[pdf]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Yuankai and Li, Zhinan and Patsch, Constantin and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5999-6008} }
Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2026_CVPR, author = {Gao, Yifei and Xu, Ning and Jin, Guoqing and Zhang, Shenyuan and Liu, An-An}, title = {Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5755-5764} }
Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Jingcheng and Xiong, Tianhu and Qian, Shengyi and Nahrstedt, Klara and Wu, Mingyuan}, title = {Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3322-3331} }
ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Ning, Zijian and Zhao, Zonglin and Wang, Mingjia}, title = {ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3820-3829} }
FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2026_CVPR, author = {Sun, Yifan and Meng, Qingjie and Chen, Tao and Chen, Huiping}, title = {FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3988-3997} }
Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Roh_2026_CVPR, author = {Roh, Jimin and Kim, Dongkyu and Kang, Suk-Ju}, title = {Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6218-6227} }
FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Margaryan_2026_CVPR, author = {Margaryan, Hovhannes and Bammey, Quentin and Sandor, Christian}, title = {FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3861-3872} }
Fast Kernel-Space Diffusion for Remote Sensing Pansharpening-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jin_2026_CVPR, author = {Jin, Hancong and Cao, Zihan and Deng, Liang-Jian and Li, Jingjing}, title = {Fast Kernel-Space Diffusion for Remote Sensing Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6291-6301} }
DebFilter: Eradicating Biases Stashed in Value-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Seung Hyuk and Kim, Songkuk}, title = {DebFilter: Eradicating Biases Stashed in Value}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4790-4799} }
MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography-
[pdf]
[supp]
[bibtex]@InProceedings{Laugros_2026_CVPR, author = {Laugros, Alfred and Roig, Sebastien and Pacureanu, Alexandra}, title = {MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5388-5398} }
Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Harmanani_2026_CVPR, author = {Harmanani, Mohamed and Long, Bining and Guo, Zhuoxin and Wilson, Paul F.R. and Sabour, Amirhossein and To, Minh Nguyen Nhat and Fichtinger, Gabor and Abolmaesumi, Purang and Mousavi, Parvin}, title = {Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5589-5598} }
CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Lihao and Yang, Biao and Wang, Yan and Li, Da and Cao, Jiangxia and Luo, Yuxiao and Chen, Xiang and Wu, Xiangyu and Yuan, Wei and Yang, Fan and Ding, Guiguang and Gao, Tingting and Zhou, Guorui}, title = {CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5714-5724} }
Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chaochen and Zuo, Meiyun and Xie, Lei}, title = {Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5495-5504} }
AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hashmi_2026_CVPR, author = {Hashmi, Anees Ur Rehman and Saeed, Numan and Lippert, Christoph}, title = {AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6009-6018} }
Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jia and Fu, Xiaomeng and Gao, Yizhao and Wang, Jiaxu and Wang, Xi and So, Hayden Kwok-Hay}, title = {Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4160-4169} }
Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3273-3281} }
MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Yang and Sun, Tengjiao and Fang, Pengcheng and Wang, Deng-Bao and Cai, Xiaohao and Zhang, Min-Ling and Kim, Hansung}, title = {MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3709-3718} }
Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziheng and Dai, Lisong and Zhang, Ya and Xie, Weidi and Wang, Yanfeng}, title = {Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5179-5189} }
PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Yantao and Yan, Chenyang and Hui, Qiang and Zhao, Fang and Cheng, Kanzhi and Tan, Chao and Gao, Huanlin and Zhang, Jianbing and Wang, Kai and Dai, Xinyu and Lian, Shiguo}, title = {PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6111-6121} }
TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2026_CVPR, author = {Dong, Linwei and Fan, Qingnan and Yu, Yuhang and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5075-5085} }
FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Geng_2026_CVPR, author = {Geng, Tianyu and Liang, Wenfei and Wang, Sijie and She, Rui and Tay, Wee Peng}, title = {FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4035-4044} }
SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2026_CVPR, author = {Yan, Wenhao and Ye, Sheng and Yang, Zhuoyi and Teng, Jiayan and Dong, ZhenHui and Wen, Kairui and Gu, Xiaotao and Liu, Yong-Jin and Tang, Jie}, title = {SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4450-4460} }
Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Jangho and Kwon, Taesung and Ye, Jong Chul}, title = {Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4045-4054} }
TAUE: Training-free Noise Transplant and Cultivation Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagai_2026_CVPR, author = {Nagai, Daichi and Morita, Ryugo and Kitada, Shunsuke and Iyatomi, Hitoshi}, title = {TAUE: Training-free Noise Transplant and Cultivation Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3749-3758} }
Fast Autoregressive Video Generation with Diagonal Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yang and Guo, Junliang and Wu, Haoyu and He, Tianyu and Pearce, Tim and Rashid, Tabish and Hofmann, Katja and Bian, Jiang}, title = {Fast Autoregressive Video Generation with Diagonal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4419-4428} }
HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yeqi and Li, Liang and Yang, Zhiwen and Sheng, Xichun and Zhao, Zhidong and Yan, Chenggang}, title = {HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3914-3923} }
PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinze and Han, Keyi and Huang, Qiushi and Tian, Jie and Hu, Zhenhua}, title = {PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5599-5608} }
FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_2026_CVPR, author = {Le, Minh Khoa and Do, Kien and Nguyen, Duc Thanh and Tran, Truyen}, title = {FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4234-4244} }
FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Oh_2026_CVPR, author = {Oh, Minseok and Park, Jihun and Gim, Jongmin and Choi, Minwoo and Lee, Kyoungmin and Fioretto, Ferdinando and Im, Sunghoon}, title = {FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3779-3788} }
SAT: Selective Aggregation Transformer for Image Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2026_CVPR, author = {Tran, Dinh Phu and Do, Thao and Wazir, Saad and Kim, Seongah and Kim, Seon Kwon and Kim, Daeyoung}, title = {SAT: Selective Aggregation Transformer for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4982-4992} }
Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Zexi and Li, Baolu and Dai, Jing and Zhang, Yiming and Ma, Yue and Wang, Qinghe and Jia, Xu and Xu, Hongming}, title = {Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4202-4212} }
Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zong_2026_CVPR, author = {Zong, Xintao and Liu, Wenxuan and Ding, Jianhao and Yu, Zhaofei and Zhong, Xian and Huang, Tiejun}, title = {Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5137-5146} }
WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Yufei and Li, Jiaman and Rong, Ryan and Liu, C. Karen}, title = {WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3481-3491} }
E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Zhang, Zhang and Dai, Chensheng and Duan, Yueqi}, title = {E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4429-4439} }
Video Generation Models are Good Latent Reward Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mi_2026_CVPR, author = {Mi, Xiaoyue and Yu, Wenqing and Lian, Jiesong and Jie, Shibo and Zhong, Ruizhe and Liu, Zijun and Zhang, Guozhen and Zhou, Zixiang and Xu, Zhiyong and Zhou, Yuan and Lu, Qinglin and Tang, Fan}, title = {Video Generation Models are Good Latent Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4719-4728} }
From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Cetinkaya_2026_CVPR, author = {\c{C}etinkaya, Evren and Lee, Sangmin and Kim, Jung Uk and Lee, Hong Joo and Navab, Nassir}, title = {From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5325-5335} }
StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Behrens_2026_CVPR, author = {Behrens, Tjark and Obukhov, Anton and Ke, Bingxin and Tosi, Fabio and Poggi, Matteo and Schindler, Konrad}, title = {StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3975-3987} }
Do Vision Models Perceive Illusory Motion in Static Images Like Humans?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rosario_2026_CVPR, author = {Rosario, Isabella E. and Cheng, Fan L. and Sun, Zitang and Kriegeskorte, Nikolaus}, title = {Do Vision Models Perceive Illusory Motion in Static Images Like Humans?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5515-5524} }
Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI-
[pdf]
[supp]
[bibtex]@InProceedings{Taymourtash_2026_CVPR, author = {Taymourtash, Athena and Abulnaga, S Mazdak and Abaci-Turk, Esra and Grant, P Ellen and Golland, Polina}, title = {Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5285-5294} }
FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Duran_2026_CVPR, author = {Duran, Enes and Athanasiou, Nikos and Kocabas, Muhammed and Black, Michael J. and Taheri, Omid}, title = {FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3438-3448} }
Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2026_CVPR, author = {Pan, Xiaokang and Zhang, Zhizhong and Liu, Yangyuan and Chen, Zhuoran and Zhang, Zhiwei and Ji, Bin and Chen, Mingang and Xie, Yong and Gong, Jingyu and Wang, Xuhong and Tan, Xin and Xie, Yuan}, title = {Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3719-3728} }
PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlong and Chen, Weichi and Rajab, Adrian and Liu, Wenfang and Jin, Yulan and Srisuwananukorn, Andrew and Zhang, Ping}, title = {PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5569-5578} }
Conformal Cross-Modal Active Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Hoang and Jung, C\'edric and Salehi, Shirin and Gl\"uck, Tobias and Schmeink, Anke and Kugi, Andreas}, title = {Conformal Cross-Modal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5147-5157} }
HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ray_2026_CVPR, author = {Ray, Anirban and Ashesh, Ashesh and Jug, Florian}, title = {HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5652-5661} }
Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi and Huang, Hongbo and Zhang, Liang-Jie}, title = {Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3924-3932} }
Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering-
[pdf]
[bibtex]@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Nie, Jia and Cheng, Li and Min, Shiquan and Zhu, Jiangping}, title = {Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5818-5827} }
Diffusion^2: Turning 3D Environments into Radio Frequency Heatmaps-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2026_CVPR, author = {Park, Kyoungjun and Yang, Yifan and Ge, Changhan and Qiu, Lili and Jiang, Shiqi}, title = {Diffusion{\textasciicircum}2: Turning 3D Environments into Radio Frequency Heatmaps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6414-6423} }
Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now-
[pdf]
[supp]
[bibtex]@InProceedings{Thozhiyoor_2026_CVPR, author = {Thozhiyoor, Varun Varma and Tripathi, Shivam and Radhakrishnan, Venkatesh Babu and Bhattad, Anand}, title = {Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3830-3839} }
One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Jinxi and He, Zijian and Wang, Guangrun and Li, Guanbin and Lin, Liang}, title = {One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4310-4320} }
Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhen and Dong, Zhaorong and Yang, Xiao and Huang, Liqin and Wu, Qiang and Zeng, Taidui and Zheng, Hanyu and Yang, Mingjing and Zheng, Shaohua and Ding, Wangbin and Pan, Lin}, title = {Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5315-5324} }
A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Astolfi_2026_CVPR, author = {Astolfi, Giacomo and Bianchi, Matteo and Campi, Riccardo and De Santis, Antonio and Brambilla, Marco}, title = {A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3303-3311} }
Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction-
[pdf]
[bibtex]@InProceedings{Ismail_2026_CVPR, author = {Ismail, Nazrul and Malik, Owais Ahmed and Hong, Ong Wee}, title = {Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6019-6028} }
A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition-
[pdf]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Yanzhong and Shi, Daming}, title = {A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6528-6537} }
No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2026_CVPR, author = {Wen, Tingyan and Li, Haoyu and Chen, Yihuang and Zhou, Xing and Zhu, Lifei and Wang, XueQian}, title = {No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4108-4117} }
LoViC: Efficient Long Video Generation with Context Compression-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jiaxiu and Li, Wenbo and Ren, Jingjing and Qiu, Yuping and Pei, Renjing and Song, Fenglong and Guo, Yong and Xu, Xiaogang and Wu, Han and Zuo, Wangmeng}, title = {LoViC: Efficient Long Video Generation with Context Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4022-4034} }
TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2026_CVPR, author = {Chen, Shunian and Huang, Hejin and Liu, Yexin and Ye, Zihan and Chen, Pengcheng and Zhu, Chenghao and Guan, Michael and Wang, Rongsheng and Chen, Junying and Hou, Jianye and Li, Bo and Li, Guanbin and Lim, Ser-Nam and Yang, Harry and Wang, Benyou}, title = {TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3492-3500} }
Loom: Diffusion-Transformer for Interleaved Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ye_2026_CVPR, author = {Ye, Mingcheng and Liu, Jiaming and Song, Yiren}, title = {Loom: Diffusion-Transformer for Interleaved Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4582-4592} }
Concept Erasure via Attention Redirection-
[pdf]
[supp]
[bibtex]@InProceedings{Schechter_2026_CVPR, author = {Schechter, Amit and Gal, Rinon and Kedem, Ofir and Chechik, Gal and Cohen-Or, Daniel}, title = {Concept Erasure via Attention Redirection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4572-4581} }
FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Garderes_2026_CVPR, author = {Gard\`eres, Fran\c{c}ois and Gauthier, Camille-Sovanneary and Ponce, Jean and Chen, Shizhe}, title = {FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5694-5703} }
BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2026_CVPR, author = {Huo, Yufei and Li, Ao and Dai, Wenxun and Wu, Songli and Tang, Yansong}, title = {BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3449-3459} }
VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2026_CVPR, author = {Cai, Minghong and Wang, Qiulin and Ye, Zongli and Liu, Wenze and Liu, Quande and Ye, Weicai and Wang, Xintao and Wan, Pengfei and Gai, Kun and Yue, Xiangyu}, title = {VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4475-4485} }
Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Yu and Zhu, Ting and Liu, Yichun and Ma, Lichen and Shan, Xinyuan and Fu, Jingling and Shi, Yu and Huang, Junshi and Li, Yan}, title = {Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4769-4779} }
PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2026_CVPR, author = {He, Jingxuan and Su, Busheng and Wong, Finn}, title = {PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4780-4789} }
Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2026_CVPR, author = {Singh, Naman Deep and Croce, Francesco and Hein, Matthias}, title = {Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6164-6173} }
Vision-Language Models for Automated 3D PET/CT Report Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiao_2026_CVPR, author = {Jiao, Wenpei and Yan, Ke and Zhang, Jiajin and Jin, Dakai and Xie, Zhaoheng}, title = {Vision-Language Models for Automated 3D PET/CT Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5295-5304} }
Animated-ART: Multi-Layer Transparent Video Generation-
[pdf]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Ziqiang and Wang, Yunnan and Chen, Dong and Dong, Yue and Li, Ji and Yuan, Yuhui and Jin, Xin}, title = {Animated-ART: Multi-Layer Transparent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4150-4159} }
GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light & Camera-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Jiaye and Hadadan, Saeed and Lin, Geng and Tu, Peihan and Zwicker, Matthias and Jacobs, David and Sengupta, Roni}, title = {GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light \& Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6445-6455} }
PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Appelle_2026_CVPR, author = {Appelle, Aaron and Lynch, Jerome P.}, title = {PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4461-4474} }
Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation-
[pdf]
[bibtex]@InProceedings{Wan_2026_CVPR, author = {Wan, Zhibin and Gao, Zhiqiang and Sun, Mingjie and Wu, Yupei and Fu, Guohong and Yi, Ran}, title = {Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4170-4179} }
ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks-
[pdf]
[supp]
[bibtex]@InProceedings{Tariq_2026_CVPR, author = {Tariq, Abdullah and Saleem, Bisma and Azad, R Muhammad Atif and Masek, Martin and Gilani, Syed Zulqarnain}, title = {ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6548-6559} }
ColorMam: Color-Aware State Space Model for Image Color Style Transfer-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Jian and Peng, Jiaxin and Li, Yuchen and Zhou, Siwang}, title = {ColorMam: Color-Aware State Space Model for Image Color Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4379-4388} }
Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2026_CVPR, author = {Huang, Yubo and Wang, Weiqiang and Zhao, Sirui and Xu, Tong and Liu, Lin and Chen, Enhong}, title = {Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4440-4449} }
Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2026_CVPR, author = {Sharma, Agniv and Xie, Xianghui and Fischer, Tom and Ilg, Eddy and Pons-Moll, Gerard}, title = {Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3405-3416} }
Unbiased Dynamic Multimodal Fusion-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2026_CVPR, author = {Wei, Shicai and Zhang, Kaijie and Chen, Luyi and He, Tao and Duan, Guiduo}, title = {Unbiased Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6239-6249} }
Future Optical Flow Prediction Improves Robot Control and Video Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Kanchana and Zhou, Honglu and Fang, Yu and Yang, Luyu and Xue, Le and Xu, Ran and Xiong, Caiming and Savarese, Silvio and Ryoo, Michael S and Niebles, Juan Carlos}, title = {Future Optical Flow Prediction Improves Robot Control and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4528-4540} }
ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko and Golyanik, Vladislav}, title = {ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3665-3676} }
OminiControl2: Efficient Conditioning for Diffusion Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2026_CVPR, author = {Tan, Zhenxiong and Xue, Qiaochu and Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {OminiControl2: Efficient Conditioning for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4256-4265} }
ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation-
[pdf]
[bibtex]@InProceedings{Nian_2026_CVPR, author = {Nian, Bingkun and Tang, Fenghe and Ning, Zhiwei and Jiang, Dongsheng and Li, Yin and Yang, JIE and Xiao, Rong and Zhou, Shaohua Kevin and Liu, Wei}, title = {ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6371-6381} }
Low-Bitrate Video Compression through Semantic-Conditioned Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Lingdong and Su, Guan-Ming and Kothandaraman, Divya and Huang, Tsung-Wei and Hajiesmaili, Mohammad and Sitaraman, Ramesh K.}, title = {Low-Bitrate Video Compression through Semantic-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4495-4505} }
Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Chenwei and Shuai, Zitao and Shen, Liyue}, title = {Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5641-5651} }
Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification-
[pdf]
[bibtex]@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wang, Qian and Song, Yang}, title = {Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6476-6485} }
Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2026_CVPR, author = {Song, Xinya and Yang, Bo and Cao, Ying}, title = {Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4729-4738} }
COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Li, Ruirui}, title = {COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6154-6163} }
Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Tan, Guangshuang and Hu, Jie and Kan, Shichao and Jiang, Bing and Liang, Yixiong}, title = {Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5190-5200} }
CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Boote_2026_CVPR, author = {Boote, Bikram and Kim, Junho and Kara, Ozgur and Lee, Sangmin and Rehg, James M}, title = {CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3417-3427} }
Learning Spatial-Preserving Hierarchical Representations for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2026_CVPR, author = {Wu, Weiyi and Diao, Xingjian and Zhang, Chunhui and Gao, Chongyang and Xu, Xinwen and Li, Siting and Gui, Jiang}, title = {Learning Spatial-Preserving Hierarchical Representations for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5484-5494} }
Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2026_CVPR, author = {Li, Xuecheng and Jia, Weikuan and Zheng, Yuanjie}, title = {Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6040-6050} }
RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2026_CVPR, author = {Yang, Zhen and Shen, Guibao and Li, Minyang and Hou, Liang and Liu, Mushui and Wang, Luozhou and Tao, Xin and Chen, Ying-Cong}, title = {RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3809-3819} }
Video Reasoning Without Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Deepak and Bhardwaj, Kartikeya and Jeyaraj, Jeya Pradha and Vasconcelos, Nuno and Nayak, Ankita and Teague, Harris}, title = {Video Reasoning Without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6250-6260} }
Towards Source-Aware Object Swapping with Initial Noise Perturbation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahui and Sun, Xianbing and Zhu, Xiangnan and Ji, Yikun and Liu, Ruitong and Zhang, Liqing and Zhang, Jianfu}, title = {Towards Source-Aware Object Swapping with Initial Noise Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4400-4409} }
TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenzhi and Wang, Jian and Ma, Ke and Lin, Dahua and Zhou, Bing}, title = {TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4516-4527} }
Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2026_CVPR, author = {Choi, Mingyu and Han, Woo Kyoung and Im, Sunghoon and Jin, Kyong Hwan}, title = {Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4950-4960} }
RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement-
[pdf]
[bibtex]@InProceedings{Liu_2026_CVPR, author = {Liu, Boheng and Li, Ziyu and Zhang, Zhong and Xu, Mengrui and Duan, Chenghua and Liu, Dehao and Li, Qing and Wu, Xia}, title = {RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4961-4970} }
Back

