CVPR 2026 Open Access Repository

Findings

Back
Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering: Paritosh Parmar,

Eric Peh,

Basura Fernando; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parmar_2026_CVPR, author = {Parmar, Paritosh and Peh, Eric and Fernando, Basura}, title = {Causal Chain-Guided Reasoning for Modular and Explainable Causal-Why Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5860-5870} }
Optimal-Transport-based Feature Alignment for Multimodal Change Detection: Mengqi Huang,

Jun Liu,

Li Cui,

Yuping Duan,

Faqiang Wang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Mengqi and Liu, Jun and Cui, Li and Duan, Yuping and Wang, Faqiang}, title = {Optimal-Transport-based Feature Alignment for Multimodal Change Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6342-6351} }
Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas: Felix Wimbauer,

Fabian Manhardt,

Michael Oechsle,

Nikolai Kalischek,

Christian Rupprecht,

Daniel Cremers,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wimbauer_2026_CVPR, author = {Wimbauer, Felix and Manhardt, Fabian and Oechsle, Michael and Kalischek, Nikolai and Rupprecht, Christian and Cremers, Daniel and Tombari, Federico}, title = {Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4541-4551} }
Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis: Aimei Dong,

Yongxing Cai,

Bin Liu,

Jiale Sun,

Guixin Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Aimei and Cai, Yongxing and Liu, Bin and Sun, Jiale and Zhao, Guixin}, title = {Multimodal Decoupled Dynamic Graph Learning for Brain Disease Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5430-5439} }
How to Achieve Prototypical Birth and Death for OOD Detection?: Ningkang Peng,

Qianfeng Yu,

Xiaoqian Peng,

Linjing Qian,

Yafei Liu,

Canran Xiao,

Xinyu Lu,

Tingyu Lu,

Zhichao Zheng,

Yanhui Gu; [pdf] [arXiv]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Ningkang and Yu, Qianfeng and Peng, Xiaoqian and Qian, Linjing and Liu, Yafei and Xiao, Canran and Lu, Xinyu and Lu, Tingyu and Zheng, Zhichao and Gu, Yanhui}, title = {How to Achieve Prototypical Birth and Death for OOD Detection?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6466-6475} }
TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models: Xu Ma,

Yun Fu; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2026_CVPR, author = {Ma, Xu and Fu, Yun}, title = {TextBind: Your Vision-Language Models are Naturally Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6029-6039} }
Decoupled Scale-wise Autoregressive Modeling for Visual Generation: Sucheng Ren,

Yaodong Yu,

Nataniel Ruiz,

Feng Wang,

Cihang Xie; [pdf]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Sucheng and Yu, Yaodong and Ruiz, Nataniel and Wang, Feng and Xie, Cihang}, title = {Decoupled Scale-wise Autoregressive Modeling for Visual Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4506-4515} }
Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline: Qizhi Xie,

Kun Yuan,

Yunpeng Qu,

Ming Sun,

Chao Zhou,

Jihong Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Qizhi and Yuan, Kun and Qu, Yunpeng and Sun, Ming and Zhou, Chao and Zhu, Jihong}, title = {Pioneering Perceptual Video Fluency Assessment: A Novel Task with Benchmark Dataset and Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4821-4831} }
Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization: Yuqi Chen,

Xiaohan Zhang,

Ahmad Arrabi,

Waqas Sultani,

Chen Chen,

Safwan Wshah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuqi and Zhang, Xiaohan and Arrabi, Ahmad and Sultani, Waqas and Chen, Chen and Wshah, Safwan}, title = {Turning Generators into Retrievers: Unlocking MLLMs for Natural Language-Guided Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6331-6341} }
OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation: Guowei Xu,

Yuxuan Bian,

Ailing Zeng,

Zhuo Chen,

Mingyi Shi,

Shaoli Huang,

Wen Li,

Lixin Duan,

Qiang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Guowei and Bian, Yuxuan and Zeng, Ailing and Chen, Zhuo and Shi, Mingyi and Huang, Shaoli and Li, Wen and Duan, Lixin and Xu, Qiang}, title = {OmniMotion-X: Versatile Multimodal Whole-Body Motion Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3641-3652} }
Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling: Xuhai Chen,

Zhi Cen,

Huaijin Pi,

Sida Peng,

Xiaowei Zhou,

Yong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xuhai and Cen, Zhi and Pi, Huaijin and Peng, Sida and Zhou, Xiaowei and Liu, Yong}, title = {Contact Matrix: Enhancing Dance Motion Synthesis with Precise Interaction Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3598-3608} }
Block Cascading: Training Free Acceleration of Block-Causal Video Models: Hmrishav Bandyopadhyay,

Nikhil Pinnaparaju,

Rahim Entezari,

Jim Scott,

Yi-Zhe Song,

Varun Jampani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2026_CVPR, author = {Bandyopadhyay, Hmrishav and Pinnaparaju, Nikhil and Entezari, Rahim and Scott, Jim and Song, Yi-Zhe and Jampani, Varun}, title = {Block Cascading: Training Free Acceleration of Block-Causal Video Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4078-4088} }
Group Relative Attention Guidance for Image Editing: Xuanpu Zhang,

Xuesong Niu,

Ruidong Chen,

Dan Song,

Jianhao Zeng,

Penghui Du,

Haoxiang Cao,

Kai Wu,

An-an Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xuanpu and Niu, Xuesong and Chen, Ruidong and Song, Dan and Zeng, Jianhao and Du, Penghui and Cao, Haoxiang and Wu, Kai and Liu, An-an}, title = {Group Relative Attention Guidance for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3840-3850} }
GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification: Yihang Wu,

Xianxu Hou,

Linlin Shen; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yihang and Hou, Xianxu and Shen, Linlin}, title = {GeneFlow: Modeling Heredity and Variation via Flow Matching Transformers for Kinship Verification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3688-3697} }
EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift: Wan Jiang,

Jing Yan,

Xiaojing Chen,

Ling Shen,

Chenhao Lin,

Yunfeng Diao,

Richang Hong; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Wan and Yan, Jing and Chen, Xiaojing and Shen, Ling and Lin, Chenhao and Diao, Yunfeng and Hong, Richang}, title = {EIRES:Training-free AI-Generated Image Detection via Edit-Induced Reconstruction Error Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6486-6495} }
Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model: Keli Deng,

Yuntao Qian; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2026_CVPR, author = {Deng, Keli and Qian, Yuntao}, title = {Semantic-Aware Spectral Reconstruction: A Spectral Library-Aided Unsupervised Method Based on the Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4940-4949} }
InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression: Dongchen Lu,

Zilu Zhang,

Leping Huang,

Yuyao Sun,

Jianliang Zeng,

Mao Shu,

Huo Cao; [pdf] [arXiv]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Dongchen and Zhang, Zilu and Huang, Leping and Sun, Yuyao and Zeng, Jianliang and Shu, Mao and Cao, Huo}, title = {InternVL-X: Advancing and Accelerating InternVL Series with Efficient Visual Token Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5765-5775} }
Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation: Lei Bao,

Hao Chen,

Yuyan Chen,

Kui Wu,

Lijia Chen,

Fangwei Zhong,

Feiran Huang,

Bo Song,

Han Yang; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2026_CVPR, author = {Bao, Lei and Chen, Hao and Chen, Yuyan and Wu, Kui and Chen, Lijia and Zhong, Fangwei and Huang, Feiran and Song, Bo and Yang, Han}, title = {Earthquake-Bench: Video Generation Benchmark for Earthquake Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4055-4066} }
Quantifying the Gap between Understanding and Generation within Unified Multimodal Models: Chenlong Wang,

Yuhang Chen,

Zhihan Hu,

Dongping Chen,

Wenhu Chen,

Sarah Wiegreffe,

Tianyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Chenlong and Chen, Yuhang and Hu, Zhihan and Chen, Dongping and Chen, Wenhu and Wiegreffe, Sarah and Zhou, Tianyi}, title = {Quantifying the Gap between Understanding and Generation within Unified Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5936-5946} }
Latent-Compressed Variational Autoencoder for Video Diffusion Models: Jiarui Guan,

Wenshuai Zhao,

Zhengtao Zou,

Juho Kannala,

Arno Solin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2026_CVPR, author = {Guan, Jiarui and Zhao, Wenshuai and Zou, Zhengtao and Kannala, Juho and Solin, Arno}, title = {Latent-Compressed Variational Autoencoder for Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3873-3883} }
CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval: Chuanshen Chen,

Kai Zhou,

Feiqi Wang,

Yutao Ning,

Zhendong Xiong,

Yirui Li,

Zhiquan Wen,

Mingkui Tan; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Chuanshen and Zhou, Kai and Wang, Feiqi and Ning, Yutao and Xiong, Zhendong and Li, Yirui and Wen, Zhiquan and Tan, Mingkui}, title = {CaptAin: Caption-driven Alignment for Bridging Modality Gaps in Partially Relevant Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6208-6217} }
FLAIR: Frequency- and Locality-Aware Implicit Neural Representations: Sukhun Ko,

Seokhyun Youn,

Dahyeon Kye,

Kyle Min,

Chanho Eom,

Jihyong Oh; [pdf] [supp]
[bibtex]
@InProceedings{Ko_2026_CVPR, author = {Ko, Sukhun and Youn, Seokhyun and Kye, Dahyeon and Min, Kyle and Eom, Chanho and Oh, Jihyong}, title = {FLAIR: Frequency- and Locality-Aware Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4877-4887} }
Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation: Runqi Wang,

Chuming Wang,

Fangqiu Yi,

Yuying Zhao,

Jingyu Xu,

Yuhang Dai,

Zheng Wang,

Chi Zhang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Runqi and Wang, Chuming and Yi, Fangqiu and Zhao, Yuying and Xu, Jingyu and Dai, Yuhang and Wang, Zheng and Zhang, Chi}, title = {Disentangle Once, Control All: A Unified and Efficient Framework for Disentangling Multi-Condition Control in Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3903-3913} }
Stochastic Perturbations Improve Distribution-to-Distribution Generative Models: Shiye Su,

Yuhui Zhang,

Linqi Zhou,

Rajesh Ranganath,

Serena Yeung-Levy; [pdf] [supp]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Shiye and Zhang, Yuhui and Zhou, Linqi and Ranganath, Rajesh and Yeung-Levy, Serena}, title = {Stochastic Perturbations Improve Distribution-to-Distribution Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3965-3974} }
Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction: Hanzhong Guo,

Yizhou Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Hanzhong and Yu, Yizhou}, title = {Decomposing Subject-Driven Image Generation via Intermediate Structural Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3799-3808} }
DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection: Donghong Jiang,

Endian Lin,

Hanqing Liu,

Mingjie Liu,

Luoping Cui,

Zhao Yang,

Chuang Zhu; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Donghong and Lin, Endian and Liu, Hanqing and Liu, Mingjie and Cui, Luoping and Yang, Zhao and Zhu, Chuang}, title = {DSAA: Dual-Stage Attribute Activation for Fine-Grained Open Vocabulary Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6538-6547} }
FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification: Yali Li,

Qianru Han,

Xinwei He,

Zhi Liu,

Jinhai Xiang; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yali and Han, Qianru and He, Xinwei and Liu, Zhi and Xiang, Jinhai}, title = {FusionBridge: An Efficient Fusion Via Feature Disentanglement for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5735-5744} }
LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration: Wentao Ruan,

Xinhui Li,

Zhan Cheng,

Cunhang Fan,

Libao Tian,

Zhao Lv; [pdf] [supp]
[bibtex]
@InProceedings{Ruan_2026_CVPR, author = {Ruan, Wentao and Li, Xinhui and Cheng, Zhan and Fan, Cunhang and Tian, Libao and Lv, Zhao}, title = {LWTformer: A Detail-Aware, Learnable Wavelet-Transformer for Ancient Chinese Character Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4971-4981} }
OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models: Jinshu Chen,

Xinghui Li,

Xu Bai,

Tianxiang Ma,

Pengze Zhang,

Mengtian Li,

Zhuowei Chen,

Gen Li,

Lijie Liu,

Songtao Zhao,

Bingchuan Li,

Qian He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Jinshu and Li, Xinghui and Bai, Xu and Ma, Tianxiang and Zhang, Pengze and Li, Mengtian and Chen, Zhuowei and Li, Gen and Liu, Lijie and Zhao, Songtao and Li, Bingchuan and He, Qian}, title = {OmniInsert: Mask-Free Video Insertion of Any Reference via Diffusion Transformer Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4067-4077} }
HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model: Youngwan Lee,

Kangsan Kim,

Kwanyong Park,

Ilchae Jung,

Soojin Jang,

Seanie Lee,

Yong-Ju Lee,

Sung Ju Hwang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Youngwan and Kim, Kangsan and Park, Kwanyong and Jung, Ilchae and Jang, Soojin and Lee, Seanie and Lee, Yong-Ju and Hwang, Sung Ju}, title = {HoliSafe: Holistic Safety Benchmarking and Modeling for Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5989-5998} }
BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices: Mincheol Kang,

HyunJin Lim,

Bomin Kang,

Daehee Park; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Mincheol and Lim, HyunJin and Kang, Bomin and Park, Daehee}, title = {BitTP: The Lightweight Trajectory Prediction Model with BitLLM for Edge-Devices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3470-3480} }
Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition: Yang Chen,

Miaoge Li,

Zhijie Rao,

Deze Zeng,

Song Guo,

Jingcai Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yang and Li, Miaoge and Rao, Zhijie and Zeng, Deze and Guo, Song and Guo, Jingcai}, title = {Learning by Neighbor-Aware Semantics, Deciding by Open-Form Flows: Towards Robust Zero-Shot Skeleton Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3374-3383} }
InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System: Xianbao Hou,

Yonghao He,

Zeyd Boukhers,

John See,

Hu Su,

Wei Sui,

Cong Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hou_2026_CVPR, author = {Hou, Xianbao and He, Yonghao and Boukhers, Zeyd and See, John and Su, Hu and Sui, Wei and Yang, Cong}, title = {InstaDA: Augmenting Instance Segmentation Data with Dual-Agent System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4300-4309} }
CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning: Jing Li,

Dongbo Zhang,

Yalin Zheng,

Yanda Meng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jing and Zhang, Dongbo and Zheng, Yalin and Meng, Yanda}, title = {CP-IMoE: Collaborative Prompt-Guided Interactive Mixture-of-Experts for Incomplete Multimodal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6090-6099} }
B-MoE: A Body-Part-Aware Mixture-of-Experts "All Parts Matter" Approach to Micro-Action Recognition: Nishit Poddar,

Aglind Reka,

Diana-Laura Borza,

Snehashis Majhi,

Michal Balazia,

Abhijit Das,

François Brémond; [pdf] [arXiv]
[bibtex]
@InProceedings{Poddar_2026_CVPR, author = {Poddar, Nishit and Reka, Aglind and Borza, Diana-Laura and Majhi, Snehashis and Balazia, Michal and Das, Abhijit and Br\'emond, Fran\c{c}ois}, title = {B-MoE: A Body-Part-Aware Mixture-of-Experts ''All Parts Matter'' Approach to Micro-Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3364-3373} }
M^4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation: Meihua Zhou,

Xinyu Tong,

Li Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Meihua and Tong, Xinyu and Yang, Li}, title = {M{\textasciicircum}4Fuse: Lightweight State-Space MoE with a Cross-Scale Gating Bridge for Brain Tumor Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5368-5377} }
Understanding Reward Hacking in Text-to-Image Reinforcement Learning: Yunqi Hong,

Kuei-Chun Kao,

Hengguang Zhou,

Cho-Jui Hsieh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Yunqi and Kao, Kuei-Chun and Zhou, Hengguang and Hsieh, Cho-Jui}, title = {Understanding Reward Hacking in Text-to-Image Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4245-4255} }
POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP: Jiyun Won,

Heemin Yang,

Woohyeok Kim,

Jungseul Ok,

Sunghyun Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Won_2026_CVPR, author = {Won, Jiyun and Yang, Heemin and Kim, Woohyeok and Ok, Jungseul and Cho, Sunghyun}, title = {POS-ISP: Pipeline Optimization at the Sequence Level for Task-aware ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4931-4939} }
DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning: Abrar Majeedi,

Zhiyuan Ruan,

Ziyi Zhao,

Hongcheng Wang,

Jianglin Lu,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Majeedi_2026_CVPR, author = {Majeedi, Abrar and Ruan, Zhiyuan and Zhao, Ziyi and Wang, Hongcheng and Lu, Jianglin and Li, Yin}, title = {DUALVISION: RGB-Infrared Multimodal Large Language Models for Robust Visual Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5786-5795} }
S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion: Jieyuan Pei,

Wei Li,

Zhuoxuan Li,

Junwei Zhu,

Meiyi Lu,

Jiawei Jiang,

Chenyu Wang,

Jianwei Zheng; [pdf]
[bibtex]
@InProceedings{Pei_2026_CVPR, author = {Pei, Jieyuan and Li, Wei and Li, Zhuoxuan and Zhu, Junwei and Lu, Meiyi and Jiang, Jiawei and Wang, Chenyu and Zheng, Jianwei}, title = {S3O: Selective Spatial-Spectral Operator for Cross-Scale Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6281-6290} }
HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming: Haonan Qiu,

Shikun Liu,

Zijian Zhou,

Zhaochong An,

Weiming Ren,

Zhiheng Liu,

Jonas Schult,

Sen He,

Shoufa Chen,

Yuren Cong,

Tao Xiang,

Ziwei Liu,

Juan-Manuel Perez-Rua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2026_CVPR, author = {Qiu, Haonan and Liu, Shikun and Zhou, Zijian and An, Zhaochong and Ren, Weiming and Liu, Zhiheng and Schult, Jonas and He, Sen and Chen, Shoufa and Cong, Yuren and Xiang, Tao and Liu, Ziwei and Perez-Rua, Juan-Manuel}, title = {HiStream: Efficient High-Resolution Video Generation via Redundancy Eliminated Streaming}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4603-4613} }
EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching: Rahul Deshmukh,

Aditya Chauhan,

Avinash Kak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deshmukh_2026_CVPR, author = {Deshmukh, Rahul and Chauhan, Aditya and Kak, Avinash}, title = {EpiMask: Leveraging Epipolar Distance Based Masks in Cross-Attention for Satellite Image Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6271-6280} }
AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction: Jiewen Chan,

Zhenjun Zhao,

Yu-Lun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chan_2026_CVPR, author = {Chan, Jiewen and Zhao, Zhenjun and Liu, Yu-Lun}, title = {AdaGaR: Adaptive Gabor Representation for Dynamic Scene Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4276-4289} }
MeMix: Multi-Encoder Mixture Framework for Medical Report Generation: Yiming Cao,

Lizhen Cui,

Zhiqi Shen; [pdf]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Yiming and Cui, Lizhen and Shen, Zhiqi}, title = {MeMix: Multi-Encoder Mixture Framework for Medical Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5474-5483} }
PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images: Pengyu Guo,

Jiachuan Wang,

Zhao CHEN,

Caleb Chen Cao,

Liping Wang,

Tingyi Jiang,

Lei Chen; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Pengyu and Wang, Jiachuan and CHEN, Zhao and Cao, Caleb Chen and Wang, Liping and Jiang, Tingyi and Chen, Lei}, title = {PaM-MIL: Proliferation and Metastasis Enhanced Localization for Multiple Instance Learning on Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5305-5314} }
M-PhyGs: Multi-Material Object Dynamics from Video: Norika Wada,

Kohei Yamashita,

Ryo Kawahara,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wada_2026_CVPR, author = {Wada, Norika and Yamashita, Kohei and Kawahara, Ryo and Nishino, Ko}, title = {M-PhyGs: Multi-Material Object Dynamics from Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6404-6413} }
TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization: Liangshun Zou,

Zhangkai Ni,

Hanli Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2026_CVPR, author = {Zou, Liangshun and Ni, Zhangkai and Wang, Hanli}, title = {TokenErase: Robust Concept Erasure via Visual-Injected Token Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4129-4139} }
Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers: Yash Belhe,

Wesley Chang,

Tzu-Mao Li,

Ravi Ramamoorthi,

Michaël Gharbi; [pdf] [supp]
[bibtex]
@InProceedings{Belhe_2026_CVPR, author = {Belhe, Yash and Chang, Wesley and Li, Tzu-Mao and Ramamoorthi, Ravi and Gharbi, Micha\"el}, title = {Activation-Norm Maximization to Accelerate Training in Flow-Matching Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4089-4096} }
Learning to Translate Noise for Robust Image Denoising: Inju Ha,

Donghun Ryou,

Seonguk Seo,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ha_2026_CVPR, author = {Ha, Inju and Ryou, Donghun and Seo, Seonguk and Han, Bohyung}, title = {Learning to Translate Noise for Robust Image Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5034-5043} }
UGLMM: Towards Unified Vision Grounding with Large Multimodal Model: Xiangheng Shan,

Li Zhou,

Zenghui Sun,

Shichao Dong,

Nong Sang,

Jinsong Lan,

Xiaoyong Zhu,

Bo Zheng,

Changxin Gao,

Kaifu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Xiangheng and Zhou, Li and Sun, Zenghui and Dong, Shichao and Sang, Nong and Lan, Jinsong and Zhu, Xiaoyong and Zheng, Bo and Gao, Changxin and Zhang, Kaifu}, title = {UGLMM: Towards Unified Vision Grounding with Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5683-5693} }
VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs: Naishan Zheng,

Qingpei Guo,

Jie Huang,

Feng Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Naishan and Guo, Qingpei and Huang, Jie and Zhao, Feng}, title = {VideoScaffold: Elastic-Scale Visual Hierarchies for Streaming Video Understanding in MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5947-5957} }
Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting: Yunpeng Liu,

Xingzhong Hou,

Jie Wu,

Boxiao Liu,

Yi Zhang,

Guanglu Song,

Yu Liu,

Changyao Tian,

Gen Luo,

Haihang You; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yunpeng and Hou, Xingzhong and Wu, Jie and Liu, Boxiao and Zhang, Yi and Song, Guanglu and Liu, Yu and Tian, Changyao and Luo, Gen and You, Haihang}, title = {Blend-Aware Latent Diffusion: Mitigating Stitched Seams in Image Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4678-4687} }
DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion: Junxiang Liu,

Junming Lin,

Jie Zhou,

Wei Xiong,

Jiangtong Li,

Jie Li,

Jie Zhuang,

Hongfei Ji; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Junxiang and Lin, Junming and Zhou, Jie and Xiong, Wei and Li, Jiangtong and Li, Jie and Zhuang, Jie and Ji, Hongfei}, title = {DynaMind: Reconstructing Dynamic Visual Scenes from EEG by Aligning Temporal Dynamics and Multimodal Semantics to Guided Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5378-5387} }
TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation: Jiawei Xu,

Qiangqiang Zhou,

Dandan Zhu,

Yong Chen,

Yugen Yi,

Xiaoqi Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiawei and Zhou, Qiangqiang and Zhu, Dandan and Chen, Yong and Yi, Yugen and Zhao, Xiaoqi}, title = {TP-Seg: Task-Prototype Framework for Unified Medical Lesion Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5452-5462} }
Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control: Linxi Xie,

Lisong C. Sun,

Ashley Neall,

Tong Wu,

Shengqu Cai,

Gordon Wetzstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Linxi and Sun, Lisong C. and Neall, Ashley and Wu, Tong and Cai, Shengqu and Wetzstein, Gordon}, title = {Generated Reality: Human-Centric World Simulation Using Interactive Video Generation with Hand and Camera Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3998-4008} }
Parallel In-context Learning for Large Vision Language Models: Shin'ya Yamaguchi,

Daiki Chijiwa,

Tamao Sakao,

Taku Hasegawa; [pdf] [arXiv]
[bibtex]
@InProceedings{Yamaguchi_2026_CVPR, author = {Yamaguchi, Shin'ya and Chijiwa, Daiki and Sakao, Tamao and Hasegawa, Taku}, title = {Parallel In-context Learning for Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5796-5806} }
Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing: Kuo Yang,

Jianglin Lu,

Yun Fu; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Kuo and Lu, Jianglin and Fu, Yun}, title = {Training-Free Cross-Modal Alignment via Anchor Profiles with Statistical Significance Testing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5704-5713} }
mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction: Haofan Lu,

Yadi Cao,

Wanghao Yi,

Omid Abari; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Haofan and Cao, Yadi and Yi, Wanghao and Abari, Omid}, title = {mmDiff: A Noise-Robust Differentiable Ray-Tracing Framework for mmWave Scene Calibration and Channel Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6435-6444} }
MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry: Yiquan Li,

Taeyoung Yeon,

Chenfeng Gao,

Vasco Xu,

Xuanyou Liu,

Karan Ahuja; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yiquan and Yeon, Taeyoung and Gao, Chenfeng and Xu, Vasco and Liu, Xuanyou and Ahuja, Karan}, title = {MARIO: Motion-Augmented Real-Time Multi-Sensor Inertial Odometry}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3460-3469} }
A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation: Yajun Liu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yajun}, title = {A Simple yet Effective Data Scaling Strategy for Semi-Supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5255-5264} }
Deep Parameter Interpolation for Scalar Conditioning: Chicago Y. Park,

Michael T. McCann,

Cristina Garcia-Cardona,

Brendt Wohlberg,

Ulugbek S. Kamilov; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Chicago Y. and McCann, Michael T. and Garcia-Cardona, Cristina and Wohlberg, Brendt and Kamilov, Ulugbek S.}, title = {Deep Parameter Interpolation for Scalar Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3884-3892} }
Zero-Shot Textual Explanations via Translating Decision-Critical Features: Toshinori Yamauchi,

Hiroshi Kera,

Kazuhiko Kawamoto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yamauchi_2026_CVPR, author = {Yamauchi, Toshinori and Kera, Hiroshi and Kawamoto, Kazuhiko}, title = {Zero-Shot Textual Explanations via Translating Decision-Critical Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3282-3292} }
Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM: Jingxuan Kang,

Ziqi Zhang,

Shaoming Zheng,

Shuang Li,

Uday Bharat Patel,

Alexander Harry Fitzhugh,

Phillip Lung,

Yusuf Kiberu,

Nikesh Jathanna,

Shahnaz Jamil-Copley,

Bernhard Kainz,

Chen Qin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Jingxuan and Zhang, Ziqi and Zheng, Shaoming and Li, Shuang and Patel, Uday Bharat and Fitzhugh, Alexander Harry and Lung, Phillip and Kiberu, Yusuf and Jathanna, Nikesh and Jamil-Copley, Shahnaz and Kainz, Bernhard and Qin, Chen}, title = {Learning from Noisy Prompts: Saliency-Guided Prompt Distillation for Robust Segmentation with SAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5224-5234} }
LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization: Chen Fu,

Shengzhou Yi,

Ling Xiao,

Toshihiko Yamasaki; [pdf] [supp]
[bibtex]
@InProceedings{Fu_2026_CVPR, author = {Fu, Chen and Yi, Shengzhou and Xiao, Ling and Yamasaki, Toshihiko}, title = {LLM Guided Multi Style Typography and Layout Generation via Dynamic Direct Preference Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5725-5734} }
FlowSteer: Conditioning Flow Field for Consistent Image Restoration: Tharindu Wickremasinghe,

Chenyang Qi,

Harshana Weligampola,

Zhengzhong Tu,

Stanley H. Chan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wickremasinghe_2026_CVPR, author = {Wickremasinghe, Tharindu and Qi, Chenyang and Weligampola, Harshana and Tu, Zhengzhong and Chan, Stanley H.}, title = {FlowSteer: Conditioning Flow Field for Consistent Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5106-5116} }
DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation: Dongzhi Jiang,

Renrui Zhang,

Haodong Li,

Zhuofan Zong,

Ziyu Guo,

Jun He,

Claire Guo,

Junyan Ye,

Rongyao Fang,

Weijia Li,

Rui Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Dongzhi and Zhang, Renrui and Li, Haodong and Zong, Zhuofan and Guo, Ziyu and He, Jun and Guo, Claire and Ye, Junyan and Fang, Rongyao and Li, Weijia and Liu, Rui and Li, Hongsheng}, title = {DraCo: Draft as CoT for Text-to-Image Preview and Rare Concept Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5903-5914} }
RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference: Yuxin Liu,

Yiqing Dong,

Wenxue Yu,

Zhan Wu,

Rongjun Ge,

Yang Chen,

Yuting He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Yuxin and Dong, Yiqing and Yu, Wenxue and Wu, Zhan and Ge, Rongjun and Chen, Yang and He, Yuting}, title = {RelativeFlow: Taming Medical Image Denoising Learning with Noisy Reference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5673-5682} }
HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection: Yixin Guo,

Yu Liu,

Weimin Wang,

Yanming Guo,

Qi Jia; [pdf]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Yixin and Liu, Yu and Wang, Weimin and Guo, Yanming and Jia, Qi}, title = {HypHOI: Exploring Hierarchical Hyperbolic Embeddings for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6517-6527} }
Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis: Shuangming Lei,

Yuehao Huang,

Yao Yi,

Yijia Xie,

Jingke Wang,

Ruoyu Wang,

Jiajun Lv,

Guanglin Xu,

AiXue Ye,

Bingbing Liu,

Siyuan Cheng,

Hongbo Zhang,

Yukai Ma,

Yong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lei_2026_CVPR, author = {Lei, Shuangming and Huang, Yuehao and Yi, Yao and Xie, Yijia and Wang, Jingke and Wang, Ruoyu and Lv, Jiajun and Xu, Guanglin and Ye, AiXue and Liu, Bingbing and Cheng, Siyuan and Zhang, Hongbo and Ma, Yukai and Liu, Yong}, title = {Drive-Cascade: Autoregressive Occupancy to LiDAR and Video Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4552-4561} }
Discovering Attention Head Interactions in Vision Transformers: Zhenyu Lu,

Yuheng Jia,

Wei You,

Hao Chen; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2026_CVPR, author = {Lu, Zhenyu and Jia, Yuheng and You, Wei and Chen, Hao}, title = {Discovering Attention Head Interactions in Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3332-3342} }
SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion: Huy Duong,

Trong-Tung Nguyen,

Cuong Pham,

Anh Tran,

Khoi Nguyen,

Minh Hoai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duong_2026_CVPR, author = {Duong, Huy and Nguyen, Trong-Tung and Pham, Cuong and Tran, Anh and Nguyen, Khoi and Hoai, Minh}, title = {SwiftPie: Lightning-fast Subject-driven Image Personalization via One step Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4708-4718} }
When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models: Jianxin Lin,

Chunzheng Zhu,

Peter J Kneuertz,

Yunfei Bai,

Yuan Xue; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Jianxin and Zhu, Chunzheng and Kneuertz, Peter J and Bai, Yunfei and Xue, Yuan}, title = {When Models Learn to Ask Why: Adaptive Causal Reasoning for Trustworthy Medical Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5556-5568} }
MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring: Chen Zhou,

Tao Wu,

Wei Liu,

Xi Wu,

Ying Fu; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Chen and Wu, Tao and Liu, Wei and Wu, Xi and Fu, Ying}, title = {MVSSM: Motion-aware Visual State Space Model for Efficient Video Deblurring}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4855-4865} }
FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation: Fangjing Li,

Zhihai Wang,

Xinxin Ding,

Haiyang Liu,

Ronghua Gao,

Rong Wang,

Yao Zhu,

Ming Jin; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Fangjing and Wang, Zhihai and Ding, Xinxin and Liu, Haiyang and Gao, Ronghua and Wang, Rong and Zhu, Yao and Jin, Ming}, title = {FSMC-Pose: Frequency and Spatial Fusion with Multiscale Self-Calibration for Cattle Mounting Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3620-3629} }
ZODS-RS -- Zero-Training Oriented Detection & Segmentation for Remote Sensing: Zuan Gu,

Tianhan Gao,

Langxu Zhao; [pdf]
[bibtex]
@InProceedings{Gu_2026_CVPR, author = {Gu, Zuan and Gao, Tianhan and Zhao, Langxu}, title = {ZODS-RS -- Zero-Training Oriented Detection \& Segmentation for Remote Sensing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6322-6330} }
Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving: Jingyun Wang,

Dian Li,

Xiaohan Wang,

Gang Liu,

Jiahong Yan,

Guoliang Kang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Jingyun and Li, Dian and Wang, Xiaohan and Liu, Gang and Yan, Jiahong and Kang, Guoliang}, title = {Concise Geometric Description as a Bridge: Unleashing the Potential of LLM for Plane Geometric Problem Solving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5958-5967} }
Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space: Kaiwen Wang,

Kaili Zheng,

Yiming Shi,

Chenyi Guo,

Ji Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiwen and Zheng, Kaili and Shi, Yiming and Guo, Chenyi and Wu, Ji}, title = {Towards Metric-Aware Multi-Person Mesh Recovery by Jointly Optimizing Human Crowd in Camera Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3532-3542} }
Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification: Anindita Mohanta,

Sourav Dey Roy,

Priya Saha,

Niharika Nath,

Mrinal Kanti Bhowmik; [pdf]
[bibtex]
@InProceedings{Mohanta_2026_CVPR, author = {Mohanta, Anindita and Roy, Sourav Dey and Saha, Priya and Nath, Niharika and Bhowmik, Mrinal Kanti}, title = {Meta-CDMTransNet: Cross-Domain Multi-Scale Transformer Meta-Learning Framework for Few-Shot Breast Histopathological Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5525-5534} }
LiteEmbed: Adapting CLIP to Rare Classes: Aishwarya Agarwal,

Srikrishna Karanam,

Vineet Gandhi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agarwal_2026_CVPR, author = {Agarwal, Aishwarya and Karanam, Srikrishna and Gandhi, Vineet}, title = {LiteEmbed: Adapting CLIP to Rare Classes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6133-6142} }
HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection: Juan M. Saeteros,

Nick J. Arévalo,

Boris X. Vintimilla; [pdf] [supp]
[bibtex]
@InProceedings{Saeteros_2026_CVPR, author = {Saeteros, Juan M. and Ar\'evalo, Nick J. and Vintimilla, Boris X.}, title = {HAFM: A Post-Fusion Gating Module for Haze-Aware RGB-Thermal Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6197-6207} }
Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing: Xinyu Yan,

Jiuchen Chen,

Qizhi Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Xinyu and Chen, Jiuchen and Xu, Qizhi}, title = {Inf-Dehaze: Beyond GPU Memory Constraints for Ultra-High-Resolution Image Dehazing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5086-5095} }
UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation: Guanjun Wu,

Jiemin Fang,

Chen Yang,

Sikuang Li,

Taoran Yi,

Jia Lu,

Zanwei Zhou,

Jiazhong Cen,

Lingxi Xie,

Xiaopeng Zhang,

Wei Wei,

Wenyu Liu,

Xinggang Wang,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guanjun and Fang, Jiemin and Yang, Chen and Li, Sikuang and Yi, Taoran and Lu, Jia and Zhou, Zanwei and Cen, Jiazhong and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Wang, Xinggang and Tian, Qi}, title = {UniLat3D: Geometry-Appearance Unified Latents for Single-Stage 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4366-4378} }
IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation: Zhufeng Xu,

Xuan Gao,

Feng-Lin Liu,

Haoxian Zhang,

Zhixue Fang,

Yu-Kun Lai,

Xiaoqiang Liu,

Pengfei Wan,

Lin Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhufeng and Gao, Xuan and Liu, Feng-Lin and Zhang, Haoxian and Fang, Zhixue and Lai, Yu-Kun and Liu, Xiaoqiang and Wan, Pengfei and Gao, Lin}, title = {IM-Animation: An Implicit Motion Representation for Identity-Decoupled Character Animation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4635-4646} }
Is Your Text-to-Image Model Robust to Caption Noise?: Weichen Yu,

Ziyan Yang,

Shanchuan Lin,

Qi Zhao,

Jianyi Wang,

Liangke Gui,

Matt Fredrikson,

Lu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2026_CVPR, author = {Yu, Weichen and Yang, Ziyan and Lin, Shanchuan and Zhao, Qi and Wang, Jianyi and Gui, Liangke and Fredrikson, Matt and Jiang, Lu}, title = {Is Your Text-to-Image Model Robust to Caption Noise?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3789-3798} }
SyntheticManga: Training-Free Manga Generation with Phased Diffusion: Xuelei Peng,

Chi-Keung Tang,

Yu-Wing Tai; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2026_CVPR, author = {Peng, Xuelei and Tang, Chi-Keung and Tai, Yu-Wing}, title = {SyntheticManga: Training-Free Manga Generation with Phased Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4410-4418} }
FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation: Sankarshana Venugopal,

Mohammad Mostafavi,

Jonghyun Choi; [pdf] [supp]
[bibtex]
@InProceedings{Venugopal_2026_CVPR, author = {Venugopal, Sankarshana and Mostafavi, Mohammad and Choi, Jonghyun}, title = {FALCON: Fast Adaptive Lightweight Computation of Intensities and Events for Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5024-5033} }
Adversarial Concept Distillation for One-Step Diffusion Personalization: Yixiong Yang,

Tao Wu,

Senmao Li,

Shiqi Yang,

Yaxing Wang,

Joost van de Weijer,

Kai Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Yixiong and Wu, Tao and Li, Senmao and Yang, Shiqi and Wang, Yaxing and van de Weijer, Joost and Wang, Kai}, title = {Adversarial Concept Distillation for One-Step Diffusion Personalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4321-4333} }
Bootstrapping Sign Language Annotations with Sign Language Models: Colin Lea,

Vasileios Baltatzis,

Connor Gillis,

Raja Kushalnagar,

Lorna Quandt,

Leah Findlater; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lea_2026_CVPR, author = {Lea, Colin and Baltatzis, Vasileios and Gillis, Connor and Kushalnagar, Raja and Quandt, Lorna and Findlater, Leah}, title = {Bootstrapping Sign Language Annotations with Sign Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3630-3640} }
SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation: You Wu,

Beier Zhu,

Chi Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, You and Zhu, Beier and Zhang, Chi}, title = {SafetyBPO: Bidirectional Preference Optimization for Safe Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4759-4768} }
FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning: Guoyang Xia,

Yifeng Ding,

Fengfa Li,

Lei Ren,

Wei Chen,

Fangxiang Feng,

Xiaojie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Guoyang and Ding, Yifeng and Li, Fengfa and Ren, Lei and Chen, Wei and Feng, Fangxiang and Wang, Xiaojie}, title = {FastMMoE: Accelerating Multimodal Large Language Models through Dynamic Expert Activation and Routing-Aware Token Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5915-5924} }
Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning: Yufei Zhan,

Yousong Zhu,

Hongyin Zhao,

Fan Yang,

Shurong Zheng,

Ming Tang,

Jinqiao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Yufei and Zhu, Yousong and Zhao, Hongyin and Yang, Fan and Zheng, Shurong and Tang, Ming and Wang, Jinqiao}, title = {Vision-R1: Evolving Human-Free Alignment in Large Vision-Language Models via Vision-Guided Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5807-5817} }
MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation: Thuy Truong Tran,

Minh Kha Do,

Phuc Nguyen Duy,

Min Hun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Thuy Truong and Do, Minh Kha and Duy, Phuc Nguyen and Lee, Min Hun}, title = {MedSAD-CLIP: Supervised CLIP with Token-Patch Cross-Attention for Medical Anomaly Detection and Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5168-5178} }
USV: Unified Sparsification for Accelerating Video Diffusion Models: Xinjian Wu,

Hongmei Wang,

Yuan Zhou,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Xinjian and Wang, Hongmei and Zhou, Yuan and Lu, Qinglin}, title = {USV: Unified Sparsification for Accelerating Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4180-4189} }
CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP: Chi Zhang,

Yachun Li,

Hang Du,

Shicai Yang,

Di Xie,

Jiang Zhu,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Chi and Li, Yachun and Du, Hang and Yang, Shicai and Xie, Di and Zhu, Jiang and Yang, Yang}, title = {CtrlISP: Rescuing Low-Light RAW Images via Controllable Neural ISP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4888-4897} }
A Diagnostic Study of Region-Based Representations in Multimodal LLMs: Ji Li,

Shengcao Cao,

Yu-Xiong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ji and Cao, Shengcao and Wang, Yu-Xiong}, title = {A Diagnostic Study of Region-Based Representations in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5978-5988} }
Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet: Yue Xin,

Ziyang Zheng,

Wenrui Dai,

Chenglin Li,

Junni Zou,

Hongkai Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Xin_2026_CVPR, author = {Xin, Yue and Zheng, Ziyang and Dai, Wenrui and Li, Chenglin and Zou, Junni and Xiong, Hongkai}, title = {Towards Noise-Robust Medical Segmentation via Chebyshev-Attention-Based Asymmetric UNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5235-5244} }
HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding: Yihan Xie,

Sijing Li,

Zhuonan Wang,

Tianwei Lin,

Chenglin Yang,

Yu Zhong,

Wenjie Yan,

Wenqiao Zhang,

Xiaogang Guo,

Jun Xiao,

Yueting Zhuang,

Beng Chin Ooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Yihan and Li, Sijing and Wang, Zhuonan and Lin, Tianwei and Yang, Chenglin and Zhong, Yu and Yan, Wenjie and Zhang, Wenqiao and Guo, Xiaogang and Xiao, Jun and Zhuang, Yueting and Ooi, Beng Chin}, title = {HeartcareGPT: A Unified Multimodal ECG Suite for Dual Signal-Image Modeling and Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6228-6238} }
Rethinking VLMs for Image Forgery Detection and Localization: Shaofeng Guo,

Jiequan Cui,

Richang Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Shaofeng and Cui, Jiequan and Hong, Richang}, title = {Rethinking VLMs for Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5828-5837} }
MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement: Jiachen Luo,

Jiajun He,

Shuai Shen,

Lin Wang,

Huy Phan,

Joshua Reiss,

Lin Haijun,

Bjoern Schuller,

Zeyu Fu,

Siyang Song; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2026_CVPR, author = {Luo, Jiachen and He, Jiajun and Shen, Shuai and Wang, Lin and Phan, Huy and Reiss, Joshua and Haijun, Lin and Schuller, Bjoern and Fu, Zeyu and Song, Siyang}, title = {MReactor: Offline Multiple Appropriate Facial Reaction Generation with Hierarchical Cognitive Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3354-3363} }
Towards Calibrated Gradient-based Multi-Task Learning: Linxiao Cao,

Mianzimei Yang,

Zhipeng Zhou,

Hong Xie,

Defu Lian,

Menglin Yang; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2026_CVPR, author = {Cao, Linxiao and Yang, Mianzimei and Zhou, Zhipeng and Xie, Hong and Lian, Defu and Yang, Menglin}, title = {Towards Calibrated Gradient-based Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5127-5136} }
NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals: Badri N Patro,

Vijay S Agneeswaran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Patro_2026_CVPR, author = {Patro, Badri N and Agneeswaran, Vijay S}, title = {NAKUL-Med: Spectral-Graph State Space Models with Dynamics Kernels for Medical Signals}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5399-5408} }
C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling: Xiaofei Wang,

Stephen J Price,

Chao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiaofei and Price, Stephen J and Li, Chao}, title = {C3-Diff: Super-resolving Spatial Transcriptomics via Cross-modal Cross-content Contrastive Diffusion Modelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5463-5473} }
DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation: Thanh-Tung Le,

Yunhan Zhao,

Menglei Chai,

Zhengyang Shen,

Zhe Cao,

Danhang Tang,

Xiaohui Xie,

Deying Kong; [pdf] [supp]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Thanh-Tung and Zhao, Yunhan and Chai, Menglei and Shen, Zhengyang and Cao, Zhe and Tang, Danhang and Xie, Xiaohui and Kong, Deying}, title = {DSA: Dynamic Step Allocation for Fast Autoregressive Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4334-4344} }
If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions: Carlo Alberto Barbano,

Luca Molinaro,

Massimiliano Ciranni,

Emanuele Aiello,

Vito Paolo Pastore,

Marco Grangetto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barbano_2026_CVPR, author = {Barbano, Carlo Alberto and Molinaro, Luca and Ciranni, Massimiliano and Aiello, Emanuele and Pastore, Vito Paolo and Grangetto, Marco}, title = {If you can describe it, they can see it: Cross-Modal Learning of Visual Concepts from Textual Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6122-6132} }
HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models: Liangjie Zhao,

Liao Wenjie,

Ming Feng,

Xiaohui Song,

Huafei Li,

Haonan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Liangjie and Wenjie, Liao and Feng, Ming and Song, Xiaohui and Li, Huafei and Lu, Haonan}, title = {HAIT: Hybrid Adversarial Iterative Training for Mitigating Object Hallucination in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6072-6079} }
Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation: Ji Lin,

Bo Peng,

Suping Li,

Qianni Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Ji and Peng, Bo and Li, Suping and Zhang, Qianni}, title = {Anatomy-Aware Adaptive Feature Perturbation Framework for Semi-Supervised MRI Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5620-5631} }
HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition: Xiaoqi Zhuang,

Jefersson A Dos Santos,

Jungong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhuang_2026_CVPR, author = {Zhuang, Xiaoqi and A Dos Santos, Jefersson and Han, Jungong}, title = {HarmoniDiff-RS: Training-Free Diffusion Harmonization for Satellite Image Composition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6352-6360} }
Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers: Mohamed Eltahir,

Ali Habibullah,

Lama Ayash,

Tanveer Hussain,

Naeemullah Khan; [pdf] [supp]
[bibtex]
@InProceedings{Eltahir_2026_CVPR, author = {Eltahir, Mohamed and Habibullah, Ali and Ayash, Lama and Hussain, Tanveer and Khan, Naeemullah}, title = {Vote-in-Context: VLMs as Explainable Zero-Shot Rank Fusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6496-6505} }
PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences: Min Lin,

Gangwei Xu,

Xianqi Wang,

Yuyi Peng,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Min and Xu, Gangwei and Wang, Xianqi and Peng, Yuyi and Yang, Xin}, title = {PCSTracker: Long-term Scene Flow Estimation for Point Cloud Sequences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4920-4930} }
Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models: Xinpeng Dong,

Min Zhang,

Kairong Han,

Xu Tan,

Fei Wu,

Kun Kuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Xinpeng and Zhang, Min and Han, Kairong and Tan, Xu and Wu, Fei and Kuang, Kun}, title = {Vision Inference Former: Sustaining Visual Consistency in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6100-6110} }
R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data: Wajih Hassan Raza,

Mya Schiess,

Juan Martinez Lemus,

Timothy Michael Ellmore,

Charles Green,

Claudio Soto,

Xin Fu,

Renjie Hu; [pdf] [supp]
[bibtex]
@InProceedings{Raza_2026_CVPR, author = {Raza, Wajih Hassan and Schiess, Mya and Lemus, Juan Martinez and Ellmore, Timothy Michael and Green, Charles and Soto, Claudio and Fu, Xin and Hu, Renjie}, title = {R2MoE: Representation and Expert Selection Dual-Regularized Mixture-of-Experts for Multimodal Clinical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5776-5785} }
Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection: Kaustubh R Borgavi,

Sarvesh Shashikumar,

Chetan Arora; [pdf] [supp]
[bibtex]
@InProceedings{Borgavi_2026_CVPR, author = {Borgavi, Kaustubh R and Shashikumar, Sarvesh and Arora, Chetan}, title = {Adapting with an Open Mind: Leveraging Open-Vocabulary Detectors for Closed Set Source-Free Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6570-6581} }
TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning: Varun Belagali,

Saarthak Kapse,

Pierre Marza,

Srijan Das,

Zilinghan Li,

Sofiène Boutaj,

Pushpak Pati,

Srikar Yellapragada,

Tarak Nath Nandi,

Ravi K Madduri,

Joel Saltz,

Prateek Prasanna,

Stergios Christodoulidis,

Maria Vakalopoulou,

Dimitris Samaras; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Belagali_2026_CVPR, author = {Belagali, Varun and Kapse, Saarthak and Marza, Pierre and Das, Srijan and Li, Zilinghan and Boutaj, Sofi\`ene and Pati, Pushpak and Yellapragada, Srikar and Nandi, Tarak Nath and Madduri, Ravi K and Saltz, Joel and Prasanna, Prateek and Christodoulidis, Stergios and Vakalopoulou, Maria and Samaras, Dimitris}, title = {TICON: A Slide-Level Tile Contextualizer for Histopathology Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5440-5451} }
CLASH: A Benchmark for Cross-Modal Contradiction Detection: Teodora Popordanoska,

Jiameng Li,

Matthew B. Blaschko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Popordanoska_2026_CVPR, author = {Popordanoska, Teodora and Li, Jiameng and Blaschko, Matthew B.}, title = {CLASH: A Benchmark for Cross-Modal Contradiction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6051-6061} }
GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection: Ziyi Xu,

Zejing Rao,

Juan Cao,

Xiaoqiang Liu,

Zhixue Fang,

Haoxian Zhang,

Songlin Tang,

Fan Tang; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Ziyi and Rao, Zejing and Cao, Juan and Liu, Xiaoqiang and Fang, Zhixue and Zhang, Haoxian and Tang, Songlin and Tang, Fan}, title = {GeoHOI: Geometry-Enhanced Human-Object Interaction Video Generation via Hierarchical Multi-Modal Injection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3739-3748} }
UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition: Shuai Zhang,

Zhecheng Shi,

Zhuoxiao Li,

Jing Ou,

Tengxi Wang,

Yuan Liu,

Wufan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shuai and Shi, Zhecheng and Li, Zhuoxiao and Ou, Jing and Wang, Tengxi and Liu, Yuan and Zhao, Wufan}, title = {UniD-Shift: Towards Unified Semantic Segmentation via Interpretable Shared-Private Multimodal Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6382-6393} }
Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching: Yexiong Lin,

Yu Yao,

Yang Zhou,

Tongliang Liu; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yexiong and Yao, Yu and Zhou, Yang and Liu, Tongliang}, title = {Beyond Optimal Transport: Model-Aligned Coupling for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3955-3964} }
All-Age Human Mesh Recovery: Laura Bravo-Sánchez,

Matthieu Armando,

Romain Brégier,

Grégory Rogez,

Serena Yeung-Levy,

Fabien Baradel; [pdf] [supp]
[bibtex]
@InProceedings{Bravo-Sanchez_2026_CVPR, author = {Bravo-S\'anchez, Laura and Armando, Matthieu and Br\'egier, Romain and Rogez, Gr\'egory and Yeung-Levy, Serena and Baradel, Fabien}, title = {All-Age Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3677-3687} }
LlamaRG: A Multi-View Large Language Model for Radiology Report Generation: Tanuja Jayas,

Aditya Rastogi,

Pavithra Raghavan,

Gianluca Brugnara,

Kai Schlamp,

Martha Foltyn-Dumitru,

Philipp Vollmuth; [pdf] [supp]
[bibtex]
@InProceedings{Jayas_2026_CVPR, author = {Jayas, Tanuja and Rastogi, Aditya and Raghavan, Pavithra and Brugnara, Gianluca and Schlamp, Kai and Foltyn-Dumitru, Martha and Vollmuth, Philipp}, title = {LlamaRG: A Multi-View Large Language Model for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5745-5754} }
Detecting Precise Hand Touch Moments in Egocentric Video: Huy Anh Nguyen,

Feras Dayoub,

Minh Hoai; [pdf] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Anh and Dayoub, Feras and Hoai, Minh}, title = {Detecting Precise Hand Touch Moments in Egocentric Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3565-3574} }
CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization: Zelin Zhao,

Xinyu Gong,

Bangya Liu,

Ziyang Song,

Jun Zhang,

Suhui Wu,

Yongxin Chen,

Hao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Zelin and Gong, Xinyu and Liu, Bangya and Song, Ziyang and Zhang, Jun and Wu, Suhui and Chen, Yongxin and Zhang, Hao}, title = {CETCam: Camera-Controllable Video Generation via Consistent and Extensible Tokenization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4749-4758} }
Rethinking Training Dynamics in Scale-Wise Autoregressive Generation: Gengze Zhou,

Chongjian Ge,

Hao Tan,

Feng Liu,

Yicong Hong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2026_CVPR, author = {Zhou, Gengze and Ge, Chongjian and Tan, Hao and Liu, Feng and Hong, Yicong}, title = {Rethinking Training Dynamics in Scale-Wise Autoregressive Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4593-4602} }
Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats: Xiaoyang Liu,

Shangzhe Wu,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaoyang and Wu, Shangzhe and Han, Kai}, title = {Scene-Level Heterogeneous Physics Simulation with 3D Gaussian Splats}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6456-6465} }
OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport: Xiwen Chen,

Wenhui Zhu,

Gen Li,

Xuanzhao Dong,

Yujian Xiong,

Hao Wang,

Peijie Qiu,

Qingquan Song,

Zhipeng Wang,

Shao Tang,

Yalin Wang,

Abolfazl Razi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Xiwen and Zhu, Wenhui and Li, Gen and Dong, Xuanzhao and Xiong, Yujian and Wang, Hao and Qiu, Peijie and Song, Qingquan and Wang, Zhipeng and Tang, Shao and Wang, Yalin and Razi, Abolfazl}, title = {OTPrune: Distribution-Aligned Visual Token Pruning Via Optimal Transport}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5849-5859} }
Consistent Video Editing as Flow-Driven Image-to-Video Generation: Ge Wang,

Songlin Fan,

Hangxu Liu,

Quanjian Song,

Hewei Wang,

Jinfeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Ge and Fan, Songlin and Liu, Hangxu and Song, Quanjian and Wang, Hewei and Xu, Jinfeng}, title = {Consistent Video Editing as Flow-Driven Image-to-Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4625-4634} }
DMin: Scalable Training Data Influence Estimation for Diffusion Models: Huawei Lin,

Yingjie Lao,

Weijie Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Huawei and Lao, Yingjie and Zhao, Weijie}, title = {DMin: Scalable Training Data Influence Estimation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3293-3302} }
Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement: Xiaoran Sun,

Liyan Wang,

Yeying Jin,

Kin-man Lam,

Zhixun Su,

Yang Yang,

Jinshan Pan,

Cong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Xiaoran and Wang, Liyan and Jin, Yeying and Lam, Kin-man and Su, Zhixun and Yang, Yang and Pan, Jinshan and Wang, Cong}, title = {Adapting Large VLMs with Iterative and Manual Instructions for Generative Low-light Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4832-4842} }
Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels: Maria Pilligua,

David Serrano-Lozano,

Pai Peng,

Ramon Baldrich,

Michael S. Brown,

Javier Vazquez-Corral; [pdf] [supp]
[bibtex]
@InProceedings{Pilligua_2026_CVPR, author = {Pilligua, Maria and Serrano-Lozano, David and Peng, Pai and Baldrich, Ramon and Brown, Michael S. and Vazquez-Corral, Javier}, title = {Evaluating Low-Light Image Enhancement Across Multiple Intensity Levels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5014-5023} }
Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting: Inseok Jeon,

Minhyeok Lee,

Seunghoon Lee,

Minseok Kang,

Suhwan Cho,

Sangyoun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeon_2026_CVPR, author = {Jeon, Inseok and Lee, Minhyeok and Lee, Seunghoon and Kang, Minseok and Cho, Suhwan and Lee, Sangyoun}, title = {Seen-to-Scene: Keep the Seen, Generate the Unseen for Video Outpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4266-4275} }
ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection: Chuangchuang Tan,

Jinglu Wang,

Xiang Ming,

Renshuai Tao,

Yunchao Wei,

Yao Zhao,

Yan Lu; [pdf]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Chuangchuang and Wang, Jinglu and Ming, Xiang and Tao, Renshuai and Wei, Yunchao and Zhao, Yao and Lu, Yan}, title = {ForenDeX: Unlocking Forensic Insights for Explainable AI-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6592-6601} }
Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation: Tuopusen Huang,

Ding Ma,

Xiangqian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Tuopusen and Ma, Ding and Wu, Xiangqian}, title = {Gaze into the Details: Locality-Sensitive Enhancement for OCTA Retinal Vessel Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5409-5418} }
DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation: Kaiyu Wang,

Bing Wang,

Changchun Li,

You Lu,

Yaning Wang,

Huimao Zhang,

Ximing Li; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Kaiyu and Wang, Bing and Li, Changchun and Lu, You and Wang, Yaning and Zhang, Huimao and Li, Ximing}, title = {DELRER: Disease Evolution-Informed Longitudinal Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5357-5367} }
THOM: Generating Physically Plausible Hand-Object Meshes From Text: Uyoung Jeong,

Yihalem Yimolal Tiruneh,

Hyung Jin Chang,

Seungryul Baek,

Kwang In Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2026_CVPR, author = {Jeong, Uyoung and Tiruneh, Yihalem Yimolal and Chang, Hyung Jin and Baek, Seungryul and Kim, Kwang In}, title = {THOM: Generating Physically Plausible Hand-Object Meshes From Text}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3653-3664} }
A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation: Xinpan Yuan,

Mingzhu Huang,

Liujie Hua,

Jianuo Ju,

Xiaowei Zhao,

Lin Yuanbo Wu; [pdf]
[bibtex]
@InProceedings{Yuan_2026_CVPR, author = {Yuan, Xinpan and Huang, Mingzhu and Hua, Liujie and Ju, Jianuo and Zhao, Xiaowei and Wu, Lin Yuanbo}, title = {A Denoising-Enhanced Multimodal Learning Framework for Robust Nasal Endoscopy Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5545-5555} }
Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios: Zhongzhen Huang,

Linjie Mu,

Yannian Gu,

Kangzhe Hu,

Shengyi Hua,

Xiaofan Zhang; [pdf]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Zhongzhen and Mu, Linjie and Gu, Yannian and Hu, Kangzhe and Hua, Shengyi and Zhang, Xiaofan}, title = {Elicit and Enhance: Advancing Multimodal Reasoning in Medical Scenarios}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5609-5619} }
PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images: Simon Damm,

Jonas Ricker,

Henning Petzka,

Asja Fischer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Damm_2026_CVPR, author = {Damm, Simon and Ricker, Jonas and Petzka, Henning and Fischer, Asja}, title = {PRADA: Probability-Ratio-Based Attribution and Detection of Autoregressive-Generated Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6506-6516} }
Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis: Yan Zhang,

Kun Liu,

Min Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yan and Liu, Kun and Li, Min}, title = {Personalized Functional Brain Network Modeling with Adaptive Auto-Weighted Learning for Automatic Brain Disorder Diagnosis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5505-5514} }
RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation: Qi Zhao,

Jun Chen,

Ivor Tsang,

Guang Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Qi and Chen, Jun and Tsang, Ivor and Dai, Guang}, title = {RealDiffusion: Physics-informed Attention for Multi-character Storybook Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4698-4707} }
BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis: Rui Dong,

Zitong Wang,

Jiaxing Li,

Weihuang Zheng,

Youyong Kong; [pdf] [arXiv]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Wang, Zitong and Li, Jiaxing and Zheng, Weihuang and Kong, Youyong}, title = {BLEG: LLM Functions as Powerful fMRI Graph-Enhancer for Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5662-5672} }
Self-Guided Integrated Gradient Method for Attribution: Sabrina Henry,

Alice Ruget,

Stirling Scholes,

Jonathan Leach; [pdf] [supp]
[bibtex]
@InProceedings{Henry_2026_CVPR, author = {Henry, Sabrina and Ruget, Alice and Scholes, Stirling and Leach, Jonathan}, title = {Self-Guided Integrated Gradient Method for Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3312-3321} }
VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification: Wanyue Zhang,

Lin Geng Foo,

Thabo Beeler,

Rishabh Dabral,

Christian Theobalt; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Wanyue and Foo, Lin Geng and Beeler, Thabo and Dabral, Rishabh and Theobalt, Christian}, title = {VHOI: Controllable Video Generation of Human-Object Interactions from Sparse Trajectories via Motion Densification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4009-4021} }
UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration: Chunming He,

Rihan Zhang,

Fengyang Xiao,

Chengyu Fang,

Longxiang Tang,

Rui Zhang,

Sina Farsiu; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Chunming and Zhang, Rihan and Xiao, Fengyang and Fang, Chengyu and Tang, Longxiang and Zhang, Rui and Farsiu, Sina}, title = {UnfoldIR: Rethinking Deep Unfolding Network in Illumination Degradation Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5003-5013} }
UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation: Hebeizi Li,

Benyuan Sun,

Yi Yang,

Zihao Liang,

Zihao Yin,

Xiao Sha,

Chenliang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Hebeizi and Sun, Benyuan and Yang, Yi and Liang, Zihao and Yin, Zihao and Sha, Xiao and Wang, Chenliang}, title = {UniTalking: A Unified Audio-Video Framework for Talking Portrait Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4647-4656} }
CADReasoner: Iterative Program Editing for CAD Reverse Engineering: Soslan Kabisov,

Vsevolod Kirichuk,

Andrey Volkov,

Marina Barannikov,

Gennadiy Savrasov,

Anton Konushin,

Andrey Kuznetsov,

Dmitrii Zhemchuzhnikov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kabisov_2026_CVPR, author = {Kabisov, Soslan and Kirichuk, Vsevolod and Volkov, Andrey and Barannikov, Marina and Savrasov, Gennadiy and Konushin, Anton and Kuznetsov, Andrey and Zhemchuzhnikov, Dmitrii}, title = {CADReasoner: Iterative Program Editing for CAD Reverse Engineering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6143-6153} }
MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer: Guile Wu,

David Huang,

Dongfeng Bai,

Bingbing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Guile and Huang, David and Bai, Dongfeng and Liu, Bingbing}, title = {MoVieDrive: Urban Scene Synthesis with Multi-Modal Multi-View Video Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4290-4299} }
S^2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation: Lin Zhao,

Yushu Wu,

Aleksei Lebedev,

Dishani Lahiri,

Meng Dong,

Arpit Sahni,

Michael Vasilkovsky,

Hao Chen,

Ju Hu,

Aliaksandr Siarohin,

Sergey Tulyakov,

Yanzhi Wang,

Anil Kag,

Yanyu Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Lin and Wu, Yushu and Lebedev, Aleksei and Lahiri, Dishani and Dong, Meng and Sahni, Arpit and Vasilkovsky, Michael and Chen, Hao and Hu, Ju and Siarohin, Aliaksandr and Tulyakov, Sergey and Wang, Yanzhi and Kag, Anil and Li, Yanyu}, title = {S{\textasciicircum}2DiT: Sandwich Diffusion Transformer for Mobile Streaming Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4355-4365} }
VeCoR -- Velocity Contrastive Regularization for Flow Matching: Zong-Wei Hong,

Jing-Lun Li,

Lin-Ze Li,

Shen Zhang,

Yao Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2026_CVPR, author = {Hong, Zong-Wei and Li, Jing-Lun and Li, Lin-Ze and Zhang, Shen and Tang, Yao}, title = {VeCoR -- Velocity Contrastive Regularization for Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4739-4748} }
Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations: Yiqing Shen,

Chenjia Li,

Mathias Unberath; [pdf] [arXiv]
[bibtex]
@InProceedings{Shen_2026_CVPR, author = {Shen, Yiqing and Li, Chenjia and Unberath, Mathias}, title = {Text-Driven Reasoning Video Editing via Reinforcement Learning on Digital Twin Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3945-3954} }
GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval: Shuhuai Wang,

Songwei Pei,

Bingfeng Liu,

Yuanzhou Huang,

Qian Li,

Shangguang Wang; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Shuhuai and Pei, Songwei and Liu, Bingfeng and Huang, Yuanzhou and Li, Qian and Wang, Shangguang}, title = {GReD-RSITR: A Generative Re-Examined Discriminative Framework for Remote Sensing Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6312-6321} }
Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning: Jiyang Xu,

Rui Liu,

Hang Dai; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Jiyang and Liu, Rui and Dai, Hang}, title = {Bridging Day and Night: Unsupervised Cross-Domain Re-Identification with Synergistic Prompt and Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6612-6621} }
OminPSD: Layered PSD Generation with Diffusion Transformer: Cheng Liu,

Yiren Song,

Haofan Wang,

Mike Zheng Shou; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Cheng and Song, Yiren and Wang, Haofan and Shou, Mike Zheng}, title = {OminPSD: Layered PSD Generation with Diffusion Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4190-4201} }
Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification: Shihao Li,

Huaibo Huang,

Aihua Zheng,

Jin Tang,

Ran He; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shihao and Huang, Huaibo and Zheng, Aihua and Tang, Jin and He, Ran}, title = {Rolling and Denoising: Rethinking Dynamic Modal Fusion for Multi-Modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6560-6569} }
Less is More: Multimodal Human Pose Estimation with Selective Fusion: Yutong Xu,

Qianyi Huang,

Xu Chen; [pdf]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Yutong and Huang, Qianyi and Chen, Xu}, title = {Less is More: Multimodal Human Pose Estimation with Selective Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3575-3584} }
DenoiseGS: Gaussian Reconstruction Model for Burst Denoising: Yongsen Cheng,

Yuanhao Cai,

Yulun Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Cheng_2026_CVPR, author = {Cheng, Yongsen and Cai, Yuanhao and Zhang, Yulun}, title = {DenoiseGS: Gaussian Reconstruction Model for Burst Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5096-5105} }
Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation: Zeqi Xiao,

Yiwei Zhao,

Lingxiao Li,

Yushi Lan,

Ning Yu,

Rahul Garg,

Mohammad H. Taghavi,

Xingang Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Zeqi and Zhao, Yiwei and Li, Lingxiao and Lan, Yushi and Yu, Ning and Garg, Rahul and Taghavi, Mohammad H. and Pan, Xingang}, title = {Video4Spatial: Towards Visuospatial Intelligence with Context-Guided Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3933-3944} }
Optical Tolerance-Compensated Diffusion Model for Image Restoration: Hongji Dong,

Huihui Gong,

Tanli Zuo,

Yu Zhao,

Jin Dai,

Jingduo Tian,

Kai Ni; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Hongji and Gong, Huihui and Zuo, Tanli and Zhao, Yu and Dai, Jin and Tian, Jingduo and Ni, Kai}, title = {Optical Tolerance-Compensated Diffusion Model for Image Restoration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5064-5074} }
EI: Early Intervention for Multimodal Imaging Based Disease Recognition: Qijie Wei,

HaiLan Lin,

Xirong Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Qijie and Lin, HaiLan and Li, Xirong}, title = {EI: Early Intervention for Multimodal Imaging Based Disease Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5632-5640} }
Cross-Resolution Diffusion Models Via Network Pruning: Jiaxuan Ren,

Junhan Zhu,

Huan Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2026_CVPR, author = {Ren, Jiaxuan and Zhu, Junhan and Wang, Huan}, title = {Cross-Resolution Diffusion Models Via Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4224-4233} }
GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment: Xiaochen Liu,

Xiaoting Xi,

Chao Yin,

Xiaoqiang Li,

Daoguo Dong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Xiaochen and Xi, Xiaoting and Yin, Chao and Li, Xiaoqiang and Dong, Daoguo}, title = {GR-Diffusion: Graph-Guided Relational-Aware Diffusion via Attention Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3759-3768} }
One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation: Yuan Gao,

Chen Chen,

Jiatao Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yuan and Chen, Chen and Gu, Jiatao}, title = {One Layer Is Enough: Adapting Pretrained Visual Encoders for Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4688-4697} }
Eevee: Towards Close-up High-resolution Video-based Virtual Try-on: Jianhao Zeng,

Yancheng Bai,

Ruidong Chen,

Xuanpu Zhang,

Lei Sun,

Dongyang Jin,

Ryan Xu,

Nannan Zhang,

Dan Song,

Xiangxiang Chu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2026_CVPR, author = {Zeng, Jianhao and Bai, Yancheng and Chen, Ruidong and Zhang, Xuanpu and Sun, Lei and Jin, Dongyang and Xu, Ryan and Zhang, Nannan and Song, Dan and Chu, Xiangxiang}, title = {Eevee: Towards Close-up High-resolution Video-based Virtual Try-on}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4614-4624} }
NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing: Zhenyu Xu,

Xiaoqi Shen,

Haotian Nan,

Xinyu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2026_CVPR, author = {Xu, Zhenyu and Shen, Xiaoqi and Nan, Haotian and Zhang, Xinyu}, title = {NumeriKontrol: Adding Numeric Control to Diffusion Transformers for Instruction-based Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4389-4399} }
AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis: Shijie Li,

Yiming Chen,

Yingyun Gong,

Hongwen Zhou,

Feng-Jung Chen,

Xieping Gao,

Zhineng Chen; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Shijie and Chen, Yiming and Gong, Yingyun and Zhou, Hongwen and Chen, Feng-Jung and Gao, Xieping and Chen, Zhineng}, title = {AceMIL: Ordinal-Aware Multiple Instance Learning for Pathological Progression Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5336-5346} }
EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting: Jaeyoung Choi,

Hyeondong Kim,

Yujin Kim,

Daehee Park; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Jaeyoung and Kim, Hyeondong and Kim, Yujin and Park, Daehee}, title = {EggHand: A Multimodal Foundation Model for Egocentric Hand Pose Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3521-3531} }
VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation: Jinxiang Lai,

Zexin Lu,

Jiajun He,

Rongwei Quan,

Wenzhe Zhao,

Qinyu Yang,

Qi Chen,

Qin Lin,

Chuyue Li,

Tao Gao,

Yuhao Shan,

Song Guo,

Qinglin Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2026_CVPR, author = {Lai, Jinxiang and Lu, Zexin and He, Jiajun and Quan, Rongwei and Zhao, Wenzhe and Yang, Qinyu and Chen, Qi and Lin, Qin and Li, Chuyue and Gao, Tao and Shan, Yuhao and Guo, Song and Lu, Qinglin}, title = {VisionCreator: A Native Visual-Generation Agentic Model with Understanding, Thinking, Planning and Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4140-4149} }
PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control: Sooyeon Park,

Jaeil Park,

Sung-Bae Cho; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Sooyeon and Park, Jaeil and Cho, Sung-Bae}, title = {PEdit: Pareto-Guided Image Editing via Dynamic Latent Trajectory Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4800-4809} }
QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution: Donglin Yang,

Paul Vicol,

Xiaojuan Qi,

Renjie Liao,

Xiaofan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Donglin and Vicol, Paul and Qi, Xiaojuan and Liao, Renjie and Zhang, Xiaofan}, title = {QDM: Quadtree-Based Region-Adaptive Sparse Diffusion Models for Efficient Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5044-5053} }
Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation: Zhihua Xie,

Haolin Chang,

Guohua Miao,

Jianing Chen; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Zhihua and Chang, Haolin and Miao, Guohua and Chen, Jianing}, title = {Weakly Supervised Micro-Expression Spotting based on Boundary Refinement Mechanism and Cross-subject Learning Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3428-3437} }
OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views: Qian Qiao,

Wenye Liu,

Ting Liu,

Jiuhe Shu,

Peng Wang; [pdf]
[bibtex]
@InProceedings{Qiao_2026_CVPR, author = {Qiao, Qian and Liu, Wenye and Liu, Ting and Shu, Jiuhe and Wang, Peng}, title = {OffNadirLoc: Benchmark and Framework for Challenging UAV-to-Satellite Geo-Localization under Large Off-Nadir Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6394-6403} }
PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration: Yedi Zhang,

Wenhui Huang,

Yuanjie Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yedi and Huang, Wenhui and Zheng, Yuanjie}, title = {PLCReg: Correlation-Aware Polar-Linear Attention for Guiding Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5535-5544} }
PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal: Zining Fang,

Cheng Xue,

Chunhui Liu,

Bin Xu,

Ming Chen,

Xiaowei Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2026_CVPR, author = {Fang, Zining and Xue, Cheng and Liu, Chunhui and Xu, Bin and Chen, Ming and Hu, Xiaowei}, title = {PhySe-RPO: Physics and Semantics Guided Relative Policy Optimization for Diffusion-Based Surgical Smoke Removal}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5347-5356} }
Do Audio-Visual Large Language Models Really See and Hear?: Ramaneswaran Selvakumar,

Kaousheik Jayakumar,

S Sakshi,

Sreyan Ghosh,

Ruohan Gao,

Dinesh Manocha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Selvakumar_2026_CVPR, author = {Selvakumar, Ramaneswaran and Jayakumar, Kaousheik and Sakshi, S and Ghosh, Sreyan and Gao, Ruohan and Manocha, Dinesh}, title = {Do Audio-Visual Large Language Models Really See and Hear?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5892-5902} }
Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models: Amir El-Ghoussani,

Marc Hölle,

Gustavo Carneiro,

Vasileios Belagiannis; [pdf] [supp]
[bibtex]
@InProceedings{El-Ghoussani_2026_CVPR, author = {El-Ghoussani, Amir and H\"olle, Marc and Carneiro, Gustavo and Belagiannis, Vasileios}, title = {Prompt-Guided Image Editing with Masked Logit Nudging in Visual Autoregressive Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4810-4820} }
Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization: Junqi Shi,

Wuyang Cong,

Ming Lu,

Bowei Xu,

Zhan Ma; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Junqi and Cong, Wuyang and Lu, Ming and Xu, Bowei and Ma, Zhan}, title = {Beyond Pixel Loss: Video-INRs Prefer Perceptual Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4843-4854} }
Depth Adaptive Efficient Visual Autoregressive Modeling: Chunliang Li,

Tianze Cao,

Sanyuan Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Chunliang and Cao, Tianze and Zhao, Sanyuan}, title = {Depth Adaptive Efficient Visual Autoregressive Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4213-4223} }
Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness: Yuyang Chen,

Linqian Zeng,

Yijin Zhou,

Hengjie Li,

Jidong Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Yuyang and Zeng, Linqian and Zhou, Yijin and Li, Hengjie and Zhai, Jidong}, title = {Jano: Adaptive Diffusion Generation with Early-Stage Convergence Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4486-4494} }
Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework: Yong Li,

Weiyu Zhang,

Ling Dai,

Jian Yang,

Dacheng Yin,

Sirun Li,

Jing Lyu,

Fengyun Rao,

Fan Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yong and Zhang, Weiyu and Dai, Ling and Yang, Jian and Yin, Dacheng and Li, Sirun and Lyu, Jing and Rao, Fengyun and Zhang, Fan}, title = {Unified Urban Tuning: Co-Enhancing Satellite and Street View Reasoning with a Progressive Tuning Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6302-6311} }
Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing: Xiangyue Li,

Xiaoyang Wang,

Siyue Yao,

Mingjie Sun,

Yupei Wu; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xiangyue and Wang, Xiaoyang and Yao, Siyue and Sun, Mingjie and Wu, Yupei}, title = {Anomaly Agent: Unified Anomaly Retrieval and Synthesis Before Manufacturing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4345-4354} }
ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement: Zhongjing Du,

Xiao Chen,

Zhiwei Nie,

Yuxuan Chen,

Chang Liu,

Xiangyang Ji,

Jie Chen; [pdf] [supp]
[bibtex]
@InProceedings{Du_2026_CVPR, author = {Du, Zhongjing and Chen, Xiao and Nie, Zhiwei and Chen, Yuxuan and Liu, Chang and Ji, Xiangyang and Chen, Jie}, title = {ControlPose: High-Fidelity Pose-Controlled Image Generation with Multi-Faceted Pose Disentanglement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3851-3860} }
P^2CS: Parallel Point Cloud Pre-Training with Semantic Consistency: Linshuang Diao,

Sensen Song,

Yuan Jia,

Yurong Qian,

Dayong Ren; [pdf]
[bibtex]
@InProceedings{Diao_2026_CVPR, author = {Diao, Linshuang and Song, Sensen and Jia, Yuan and Qian, Yurong and Ren, Dayong}, title = {P{\textasciicircum}2CS: Parallel Point Cloud Pre-Training with Semantic Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5117-5126} }
PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control: Jusheng Zhang,

Jinzhou Tang,

Sidi Liu,

Jian Wang,

Keze Wang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jusheng and Tang, Jinzhou and Liu, Sidi and Wang, Jian and Wang, Keze}, title = {PHYLOMAN: Generative Behavior Control via Fusing LLM Planning and Physics-based Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3585-3597} }
OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors: Pierre Vuillecard,

Jean-Marc Odobez; [pdf] [supp]
[bibtex]
@InProceedings{Vuillecard_2026_CVPR, author = {Vuillecard, Pierre and Odobez, Jean-Marc}, title = {OmniHead: A Unified Model for Dynamic Nonverbal Facial Behaviors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3553-3564} }
How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices: Xiang Yin,

Jinfan Hu,

Zhiyuan You,

Kainan Yan,

Yu Tang,

Chao Dong,

Jinjin Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Xiang and Hu, Jinfan and You, Zhiyuan and Yan, Kainan and Tang, Yu and Dong, Chao and Gu, Jinjin}, title = {How far have we gone in Generative Image Restoration? A study on its capability, limitations and evaluation practices}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4909-4919} }
VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation: Junwen Xiong,

Chuanyue Li,

Peng Zhang; [pdf]
[bibtex]
@InProceedings{Xiong_2026_CVPR, author = {Xiong, Junwen and Li, Chuanyue and Zhang, Peng}, title = {VoxFace: Streaming Audio-Visual Synthesis via Relay-Style Multi-Token Prediction for Interactive Conversation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3543-3552} }
PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT: Borui Kang,

Guanyi Qin,

Chuanpu Li,

Yueming Jin; [pdf] [supp]
[bibtex]
@InProceedings{Kang_2026_CVPR, author = {Kang, Borui and Qin, Guanyi and Li, Chuanpu and Jin, Yueming}, title = {PTF-CT: Polar-Aware Temporal-Frequential Iterative Reconstruction for Sparse-View CT}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5214-5223} }
Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation: Jie Zhang,

Yu Xin,

Guoqing Li; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Jie and Xin, Yu and Li, Guoqing}, title = {Two-Stage 3D Pulmonary Vessel Reconstruction via Trunk--Expansion Coupled Point Cloud Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5245-5254} }
Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations: Souptik Sen,

Raneen Younis,

Zahra Ahmadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sen_2026_CVPR, author = {Sen, Souptik and Younis, Raneen and Ahmadi, Zahra}, title = {Cross-Modal-Domain Generalization Through Semantically Aligned Discrete Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6080-6089} }
HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis: Shiyu Liu,

Kui Jiang,

Junjun Jiang,

Xianming Liu,

Xiaocheng Feng,

Fei Ma,

Hongxun Yao,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Shiyu and Jiang, Kui and Jiang, Junjun and Liu, Xianming and Feng, Xiaocheng and Ma, Fei and Yao, Hongxun and Tian, Qi}, title = {HM-Talker: Hybrid Motion Modeling for High-Fidelity Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3729-3738} }
M^3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis: Rui Dong,

Xiaotong Zhang,

Jiaxing Li,

Yueying Li,

Jiayin Wei,

Youyong Kong; [pdf]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Rui and Zhang, Xiaotong and Li, Jiaxing and Li, Yueying and Wei, Jiayin and Kong, Youyong}, title = {M{\textasciicircum}3D-BFS: a Multi-Stage Dynamic Fusion Strategy for Sample-Adaptive Multi-Modal Brain Network Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5419-5429} }
Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation: Hongbo Zheng,

Afshin Bozorgpour,

Dorit Merhof,

Minjia Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2026_CVPR, author = {Zheng, Hongbo and Bozorgpour, Afshin and Merhof, Dorit and Zhang, Minjia}, title = {Gated Differential Linear Attention: A Linear-Time Decoder for High-Fidelity Medical Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5579-5588} }
HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding: Yueqian Lin,

Jingyang Zhang,

Qinsi Wang,

Hancheng Ye,

Yuzhe Fu,

Yudong Liu,

Hai Helen Li,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2026_CVPR, author = {Lin, Yueqian and Zhang, Jingyang and Wang, Qinsi and Ye, Hancheng and Fu, Yuzhe and Liu, Yudong and Li, Hai Helen and Chen, Yiran}, title = {HippoMM: Hippocampal-inspired Multimodal Memory for Long Audiovisual Event Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5968-5977} }
FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers: Xinwan Wen,

Bowen Li,

Jiajun Luo,

Ye Li,

Zhi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Xinwan and Li, Bowen and Luo, Jiajun and Li, Ye and Wang, Zhi}, title = {FREE: Uncertainty-Aware Autoregression for Parallel Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4097-4107} }
Controllable Radar Simulation with Waveform Parameter Embedding: Weiqing Xiao,

Hao Huang,

Chonghao Zhong,

Yujie Lin,

Nan Wang,

Xiaoxue Chen,

Zhaoxi Chen,

Saining Zhang,

Shuocheng Yang,

Pierre Merriaux,

Lei Lei,

Hao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Weiqing and Huang, Hao and Zhong, Chonghao and Lin, Yujie and Wang, Nan and Chen, Xiaoxue and Chen, Zhaoxi and Zhang, Saining and Yang, Shuocheng and Merriaux, Pierre and Lei, Lei and Zhao, Hao}, title = {Controllable Radar Simulation with Waveform Parameter Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6424-6434} }
DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization: Ngoc-Son Nguyen,

Thanh V. T. Tran,

Jeongsoo Choi,

Hieu-Nghia Huynh-Nguyen,

Truong-Son Hy,

Van Nguyen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Ngoc-Son and Tran, Thanh V. T. and Choi, Jeongsoo and Huynh-Nguyen, Hieu-Nghia and Hy, Truong-Son and Nguyen, Van}, title = {DiFlowDubber: Discrete Flow Matching for Automated Video Dubbing via Cross-Modal Alignment and Synchronization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5838-5848} }
PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating: Ruichen Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ruichen}, title = {PrismNet: Semantic-Aware Image Enhancement via Vision Transformer and Zero-Cost Gating}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4866-4876} }
ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs: Yuncheng Jiang,

Chun-Mei Feng,

Rui Sun,

Le Zhang; [pdf]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Yuncheng and Feng, Chun-Mei and Sun, Rui and Zhang, Le}, title = {ReCliFF: Adaptive Orthogonal Decoupling for Federated Fine-tuning of Medical MLLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5275-5284} }
Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms: Purvish Jajal,

Nicholas John Eliopoulos,

Benjamin Shiue-Hal Chou,

George K Thiruvathukal,

James C. Davis,

Yung-Hsiang Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jajal_2026_CVPR, author = {Jajal, Purvish and Eliopoulos, Nicholas John and Chou, Benjamin Shiue-Hal and Thiruvathukal, George K and Davis, James C. and Lu, Yung-Hsiang}, title = {Inference-Time Alignment of Diffusion Models with Evolutionary Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4118-4128} }
How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking: Levente Tempfli,

Stephan Huber,

Oscar Koller,

Amanda Duarte; [pdf]
[bibtex]
@InProceedings{Tempfli_2026_CVPR, author = {Tempfli, Levente and Huber, Stephan and Koller, Oscar and Duarte, Amanda}, title = {How2Sign-Synth3D: Markerless Holistic Sign Language Performance Capture and Synthetic Data for Dense Landmark Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3501-3509} }
Learning Predictive Visuomotor Coordination: Wenqi Jia,

Bolin Lai,

Xu Cao,

Miao Liu,

Danfei Xu,

James M. Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Wenqi and Lai, Bolin and Cao, Xu and Liu, Miao and Xu, Danfei and Rehg, James M.}, title = {Learning Predictive Visuomotor Coordination}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3609-3619} }
Generative Visual Chain-of-Thought for Image Editing: Zijin Yin,

Tiankai Hang,

Yiji Cheng,

Shiyi Zhang,

Runze He,

Yu Xu,

Chunyu Wang,

Bing Li,

Zheng Chang,

Kongming Liang,

Qinglin Lu,

Zhanyu Ma; [pdf] [arXiv]
[bibtex]
@InProceedings{Yin_2026_CVPR, author = {Yin, Zijin and Hang, Tiankai and Cheng, Yiji and Zhang, Shiyi and He, Runze and Xu, Yu and Wang, Chunyu and Li, Bing and Chang, Zheng and Liang, Kongming and Lu, Qinglin and Ma, Zhanyu}, title = {Generative Visual Chain-of-Thought for Image Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4657-4667} }
Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion: Feiyang Xie,

Haoqi Yuan,

Zongqing Lu; [pdf]
[bibtex]
@InProceedings{Xie_2026_CVPR, author = {Xie, Feiyang and Yuan, Haoqi and Lu, Zongqing}, title = {Actionable Human Motion Generation via Latent Imitation and Fine-Grained Text Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3384-3393} }
Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation: Ziwei Zhang,

Dayu Tan,

Xin Peng,

Weimin Zhong; [pdf]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Ziwei and Tan, Dayu and Peng, Xin and Zhong, Weimin}, title = {Deep-to-Shallow Knowledge Transfer: Multi-Scale Self-Distillation with Bidirectional Aware for 3D Brain Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5158-5167} }
UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation: Zeyang Liu,

Le Wang,

Sanping Zhou,

Yuxuan Wu,

Xiaolong Sun,

Gang Hua,

Haoxiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Zeyang and Wang, Le and Zhou, Sanping and Wu, Yuxuan and Sun, Xiaolong and Hua, Gang and Li, Haoxiang}, title = {UniLayDiff: A Unified Diffusion Transformer for Content-Aware Layout Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4668-4677} }
AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution: Cencen Liu,

Dongyang Zhang,

Wen Yin,

Jielei Wang,

Tianyu Li,

Ji Guo,

Wenbo Jiang,

Guoqing Wang,

Guoming Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Cencen and Zhang, Dongyang and Yin, Wen and Wang, Jielei and Li, Tianyu and Guo, Ji and Jiang, Wenbo and Wang, Guoqing and Lu, Guoming}, title = {AlignVAR: Towards Globally Consistent Visual Autoregression for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5054-5063} }
Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection: Xiao Guo,

Yue Zhang,

Mohit Bansal,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2026_CVPR, author = {Guo, Xiao and Zhang, Yue and Bansal, Mohit and Liu, Xiaoming}, title = {Deepfake-Agent: Aggregating Semantic Forgery Clues for Generalizable Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4898-4908} }
CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection: Haitian Yang,

Juan Fang,

Yiren Zhu,

Xudong Zhao,

Yufei Guo,

Xiaohan Zhang,

Xiaoxing Hu,

Xue Yang,

Qi Ming; [pdf]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Haitian and Fang, Juan and Zhu, Yiren and Zhao, Xudong and Guo, Yufei and Zhang, Xiaohan and Hu, Xiaoxing and Yang, Xue and Ming, Qi}, title = {CrossWeaver: Towards Efficient Cross-Modal Interweaving and Decoupling for Weakly-Aligned Multispectral Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6361-6370} }
GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting: Ahmed Tawfik Aboukhadra,

Marcel Rogge,

Nadia Robertini,

Abdalla Arafa,

Jameel Malik,

Ahmed Elhayek,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aboukhadra_2026_CVPR, author = {Aboukhadra, Ahmed Tawfik and Rogge, Marcel and Robertini, Nadia and Arafa, Abdalla and Malik, Jameel and Elhayek, Ahmed and Stricker, Didier}, title = {GHOST: Fast Category-Agnostic Hand-Object Interaction Reconstruction from RGB Videos Using Gaussian Splatting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3394-3404} }
Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation: Yishu Zhang,

Shushan Wu,

Zhenzhong Zhang,

Didong Li,

Huaxiu Yao,

Yun Li,

Iain Carmichael,

Katherine A Hoadley,

Hongtu Zhu,

Di Wu,

Daiwei Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yishu and Wu, Shushan and Zhang, Zhenzhong and Li, Didong and Yao, Huaxiu and Li, Yun and Carmichael, Iain and A Hoadley, Katherine and Zhu, Hongtu and Wu, Di and Zhang, Daiwei}, title = {Mitigating Batch Effects in Histopathology via Language-Mediated Robust Embedding Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5201-5213} }
PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors: Haiyang Jiang,

Huiqin Zhang,

Yanduo Zhang,

Jiayi Ma,

Junjun Jiang,

Huabing Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Haiyang and Zhang, Huiqin and Zhang, Yanduo and Ma, Jiayi and Jiang, Junjun and Zhou, Huabing}, title = {PhyFusion: Physics-Aware Infrared and Visible Image Fusion via Modality-Specific Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4993-5002} }
Anticipatory Planning for Multimodal AI Agents: Yongyuan Liang,

Shijie Zhou,

Yu Gu,

Hao Tan,

Gang Wu,

Franck Dernoncourt,

Jihyung Kil,

Ryan A. Rossi,

Ruiyi Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2026_CVPR, author = {Liang, Yongyuan and Zhou, Shijie and Gu, Yu and Tan, Hao and Wu, Gang and Dernoncourt, Franck and Kil, Jihyung and Rossi, Ryan A. and Zhang, Ruiyi}, title = {Anticipatory Planning for Multimodal AI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5925-5935} }
ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation: Kwanyoung Lee,

Hyunwoo Oh,

SeungJu Cha,

Sungho Koh,

Dong-Jin Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Kwanyoung and Oh, Hyunwoo and Cha, SeungJu and Koh, Sungho and Kim, Dong-Jin}, title = {ADAPT: Attention Driven Adaptive Prompt Scheduling and InTerpolating Orthogonal Complements for Rare Concepts Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4562-4571} }
Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning: Shuai Feng,

Yuxin Ge,

Baoming Zhang,

Yuntao Du,

MingCai Chen,

Chongjun Wang,

Lei Feng; [pdf] [supp]
[bibtex]
@InProceedings{Feng_2026_CVPR, author = {Feng, Shuai and Ge, Yuxin and Zhang, Baoming and Du, Yuntao and Chen, MingCai and Wang, Chongjun and Feng, Lei}, title = {Long-Tailed Out-of-Distribution Detection with Refined Separate Class Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6602-6611} }
Scaling Pre-training to One Hundred Billion Data for Vision Language Models: Xiao Wang,

Ibrahim Alabdulmohsin,

Daniel Salz,

Zhe Li,

Keran Rong,

Xiaohua Zhai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Xiao and Alabdulmohsin, Ibrahim and Salz, Daniel and Li, Zhe and Rong, Keran and Zhai, Xiaohua}, title = {Scaling Pre-training to One Hundred Billion Data for Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6185-6196} }
V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think: Bingda Tang,

Yuhui Zhang,

Xiaohan Wang,

Jiayuan Mao,

Ludwig Schmidt,

Serena Yeung-Levy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2026_CVPR, author = {Tang, Bingda and Zhang, Yuhui and Wang, Xiaohan and Mao, Jiayuan and Schmidt, Ludwig and Yeung-Levy, Serena}, title = {V-GRPO: Online Reinforcement Learning for Denoising Generative Models Is Easier than You Think}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3769-3778} }
SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance: Qi Xia,

Peishan Cong,

Ziyi Wang,

Yujing Sun,

Qin Sun,

Xinge Zhu,

Mao Ye,

Ruigang Yang,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Qi and Cong, Peishan and Wang, Ziyi and Sun, Yujing and Sun, Qin and Zhu, Xinge and Ye, Mao and Yang, Ruigang and Ma, Yuexin}, title = {SocialMirror: Reconstructing 3D Human Interaction Behaviors from Monocular Videos with Semantic and Geometric Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3510-3520} }
Materialistic RIR: Material Conditioned Realistic RIR Generation: Mahnoor Fatima Saad,

Sagnik Majumder,

Kristen Grauman,

Ziad Al-Halah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saad_2026_CVPR, author = {Saad, Mahnoor Fatima and Majumder, Sagnik and Grauman, Kristen and Al-Halah, Ziad}, title = {Materialistic RIR: Material Conditioned Realistic RIR Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5871-5881} }
Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem: Dawei Su,

Zhanhong Fang,

Junyi Luo,

Debing Wang,

Jinbiao Chen,

Zizhen Zhang; [pdf]
[bibtex]
@InProceedings{Su_2026_CVPR, author = {Su, Dawei and Fang, Zhanhong and Luo, Junyi and Wang, Debing and Chen, Jinbiao and Zhang, Zizhen}, title = {Efficient Discrete Diffusion Model for Scalable Multi-Objective Traveling Salesman Problem}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6261-6270} }
Value bounds and Convergence Analysis for Averages of LRP attributions: Alexander Binder,

Nastaran Takmil-Homayouni,

Urun Dogan; [pdf] [supp]
[bibtex]
@InProceedings{Binder_2026_CVPR, author = {Binder, Alexander and Takmil-Homayouni, Nastaran and Dogan, Urun}, title = {Value bounds and Convergence Analysis for Averages of LRP attributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3343-3353} }
DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions: Minghai Shi,

Xiaoxian Zhang,

Xiaoyue Liu,

Fan Yang,

Lei Li; [pdf]
[bibtex]
@InProceedings{Shi_2026_CVPR, author = {Shi, Minghai and Zhang, Xiaoxian and Liu, Xiaoyue and Yang, Fan and Li, Lei}, title = {DepthScopy: Decoupling Frequency for Endoscopic Depth Estimation in Sparsely-Textured Regions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5265-5274} }
SFS-DETR: Spatial-Frequency Selection for UAV Object Detection: Dingding Jia,

Jiankang Wang,

Longlong Zhang,

Zhiheng Liu,

Xuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jia_2026_CVPR, author = {Jia, Dingding and Wang, Jiankang and Zhang, Longlong and Liu, Zhiheng and Wang, Xuan}, title = {SFS-DETR: Spatial-Frequency Selection for UAV Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6582-6591} }
Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers: Sanghyeok Nam,

Byoungjun Kim,

Daehyung Park,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2026_CVPR, author = {Nam, Sanghyeok and Kim, Byoungjun and Park, Daehyung and Kim, Tae-Kyun}, title = {Dynamic Full-body Motion Agent with Object Interaction via Blending Pre-trained Modular Controllers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3698-3708} }
Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing: Hao Shao,

Liyang Liu,

Zhengxiong Luo,

Zhuofan Zong,

Hongsheng Li; [pdf]
[bibtex]
@InProceedings{Shao_2026_CVPR, author = {Shao, Hao and Liu, Liyang and Luo, Zhengxiong and Zong, Zhuofan and Li, Hongsheng}, title = {Mining Real-World Image Relations for Large-Scale Controllable Generation and Editing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3893-3902} }
DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment: Jianqin Liu,

Peng Wang,

Junming Huang,

Xue Zhou,

Li Yu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jianqin and Wang, Peng and Huang, Junming and Zhou, Xue and Yu, Li}, title = {DA-CLIP: Mitigating Granularity Mismatch in Zero-Shot Anomaly Detection via Decoupled Text-Visual Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6062-6071} }
PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs: Ziniu Liu,

Shuheng Zhou,

Mingqing Liu,

Hao Deng,

Huijia Zhu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Ziniu and Zhou, Shuheng and Liu, Mingqing and Deng, Hao and Zhu, Huijia}, title = {PrismPrune: Decoupling Saliency and Diversity in Attention for Efficient Visual Token Pruning in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6174-6184} }
From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models: Lysa Xiao,

Veronica Liesaputra,

Lech Szymanski,

Stephen Cranefield; [pdf] [supp]
[bibtex]
@InProceedings{Xiao_2026_CVPR, author = {Xiao, Lysa and Liesaputra, Veronica and Szymanski, Lech and Cranefield, Stephen}, title = {From Coarse to Precise: Rethinking and Bridging Localization in Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5882-5891} }
UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection: Yuankai Wu,

Zhinan Li,

Constantin Patsch,

Marsil Zakour,

Driton Salihu,

Eckehard Steinbach; [pdf]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Yuankai and Li, Zhinan and Patsch, Constantin and Zakour, Marsil and Salihu, Driton and Steinbach, Eckehard}, title = {UMI-HOI: Unifying Multimodal Information with Semantic Multi-Head Attention for Human-Object Interaction Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5999-6008} }
Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning: Yifei Gao,

Ning Xu,

Guoqing Jin,

Shenyuan Zhang,

An-An Liu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2026_CVPR, author = {Gao, Yifei and Xu, Ning and Jin, Guoqing and Zhang, Shenyuan and Liu, An-An}, title = {Mitigating Information Forgetting via Entropy-Driven Progressive Retrospection for Multimodal Long Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5755-5764} }
Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking: Jingcheng Yang,

Tianhu Xiong,

Shengyi Qian,

Klara Nahrstedt,

Mingyuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Jingcheng and Xiong, Tianhu and Qian, Shengyi and Nahrstedt, Klara and Wu, Mingyuan}, title = {Circuit Tracing in Vision-Language Models: Understanding the Internal Mechanisms of Multimodal Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3322-3331} }
ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding: Tianze Xia,

Zijian Ning,

Zonglin Zhao,

Mingjia Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2026_CVPR, author = {Xia, Tianze and Ning, Zijian and Zhao, Zonglin and Wang, Mingjia}, title = {ASTRA: Enhancing Multi-Subject Generation with Retrieval-Augmented Pose Guidance and Disentangled Position Embedding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3820-3829} }
FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts: Yifan Sun,

Qingjie Meng,

Tao Chen,

Huiping Chen; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2026_CVPR, author = {Sun, Yifan and Meng, Qingjie and Chen, Tao and Chen, Huiping}, title = {FA-MoE: Improving Medical Image Generation Through Frequency-Aware Mixture of Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3988-3997} }
Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection: Jimin Roh,

Dongkyu Kim,

Suk-Ju Kang; [pdf] [supp]
[bibtex]
@InProceedings{Roh_2026_CVPR, author = {Roh, Jimin and Kim, Dongkyu and Kang, Suk-Ju}, title = {Dual Anchors, Do It Better: Hierarchical Group Merging for Zero-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6218-6227} }
FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation: Hovhannes Margaryan,

Quentin Bammey,

Christian Sandor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Margaryan_2026_CVPR, author = {Margaryan, Hovhannes and Bammey, Quentin and Sandor, Christian}, title = {FlowC2S: Flowing from Current to Succeeding Frames for Fast and Memory-Efficient Video Continuation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3861-3872} }
Fast Kernel-Space Diffusion for Remote Sensing Pansharpening: Hancong Jin,

Zihan Cao,

Liang-Jian Deng,

Jingjing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jin_2026_CVPR, author = {Jin, Hancong and Cao, Zihan and Deng, Liang-Jian and Li, Jingjing}, title = {Fast Kernel-Space Diffusion for Remote Sensing Pansharpening}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6291-6301} }
DebFilter: Eradicating Biases Stashed in Value: Seung Hyuk Lee,

Songkuk Kim; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Seung Hyuk and Kim, Songkuk}, title = {DebFilter: Eradicating Biases Stashed in Value}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4790-4799} }
MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography: Alfred Laugros,

Sebastien Roig,

Alexandra Pacureanu; [pdf] [supp]
[bibtex]
@InProceedings{Laugros_2026_CVPR, author = {Laugros, Alfred and Roig, Sebastien and Pacureanu, Alexandra}, title = {MAE-XNT: A Foundation Model for Segmenting Neuronal Tissue Volumes Generated with X-Ray Nanotomography}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5388-5398} }
Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning: Mohamed Harmanani,

Bining Long,

Zhuoxin Guo,

Paul F.R. Wilson,

Amirhossein Sabour,

Minh Nguyen Nhat To,

Gabor Fichtinger,

Purang Abolmaesumi,

Parvin Mousavi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Harmanani_2026_CVPR, author = {Harmanani, Mohamed and Long, Bining and Guo, Zhuoxin and Wilson, Paul F.R. and Sabour, Amirhossein and To, Minh Nguyen Nhat and Fichtinger, Gabor and Abolmaesumi, Purang and Mousavi, Parvin}, title = {Vision-Language Models Encode Clinical Guidelines for Concept-Based Medical Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5589-5598} }
CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension: Lihao Liu,

Biao Yang,

Yan Wang,

Da Li,

Jiangxia Cao,

Yuxiao Luo,

Xiang Chen,

Xiangyu Wu,

Wei Yuan,

Fan Yang,

Guiguang Ding,

Tingting Gao,

Guorui Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Lihao and Yang, Biao and Wang, Yan and Li, Da and Cao, Jiangxia and Luo, Yuxiao and Chen, Xiang and Wu, Xiangyu and Yuan, Wei and Yang, Fan and Ding, Guiguang and Gao, Tingting and Zhou, Guorui}, title = {CREM: Compression-Driven Representation Enhancement for Multimodal Retrieval and Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5714-5724} }
Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation: Chaochen Wu,

Meiyun Zuo,

Lei Xie; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chaochen and Zuo, Meiyun and Xie, Lei}, title = {Open-Set Spatial Gene Expression Prediction from Histological Images via Retrieval-Augmented Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5495-5504} }
AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation: Anees Ur Rehman Hashmi,

Numan Saeed,

Christoph Lippert; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hashmi_2026_CVPR, author = {Hashmi, Anees Ur Rehman and Saeed, Numan and Lippert, Christoph}, title = {AnatomiX, an Anatomy-Aware Grounded Multimodal Large Language Model for Chest X-Ray Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6009-6018} }
Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation: Jia Li,

Xiaomeng Fu,

Yizhao Gao,

Jiaxu Wang,

Xi Wang,

Hayden Kwok-Hay So; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jia and Fu, Xiaomeng and Gao, Yizhao and Wang, Jiaxu and Wang, Xi and So, Hayden Kwok-Hay}, title = {Rethinking Conditioning in Diffusion Models: Dynamic Token Scheduling for Efficient and Aligned Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4160-4169} }
Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning: Minhyeok Lee; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2026_CVPR, author = {Lee, Minhyeok}, title = {Beyond Top-1: Forensic Analysis of Full Prediction Distributions Reveals Hidden Model Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3273-3281} }
MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning: Yi-Yang Zhang,

Tengjiao Sun,

Pengcheng Fang,

Deng-Bao Wang,

Xiaohao Cai,

Min-Ling Zhang,

Hansung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi-Yang and Sun, Tengjiao and Fang, Pengcheng and Wang, Deng-Bao and Cai, Xiaohao and Zhang, Min-Ling and Kim, Hansung}, title = {MotionDuet: Dual-Conditioned 3D Human Motion Generation with Video-Regularized Text Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3709-3718} }
Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach: Ziheng Zhao,

Lisong Dai,

Ya Zhang,

Weidi Xie,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Ziheng and Dai, Lisong and Zhang, Ya and Xie, Weidi and Wang, Yanfeng}, title = {Rethinking Whole-Body CT Image Interpretation: An Abnormality-Centric Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5179-5189} }
PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment: Yantao Li,

Chenyang Yan,

Qiang Hui,

Fang Zhao,

Kanzhi Cheng,

Chao Tan,

Huanlin Gao,

Jianbing Zhang,

Kai Wang,

Xinyu Dai,

Shiguo Lian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Yantao and Yan, Chenyang and Hui, Qiang and Zhao, Fang and Cheng, Kanzhi and Tan, Chao and Gao, Huanlin and Zhang, Jianbing and Wang, Kai and Dai, Xinyu and Lian, Shiguo}, title = {PaLMR: Towards Faithful Visual Reasoning via Multimodal Process Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6111-6121} }
TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution: Linwei Dong,

Qingnan Fan,

Yuhang Yu,

Qi Zhang,

Jinwei Chen,

Yawei Luo,

Changqing Zou; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2026_CVPR, author = {Dong, Linwei and Fan, Qingnan and Yu, Yuhang and Zhang, Qi and Chen, Jinwei and Luo, Yawei and Zou, Changqing}, title = {TinySR: Shallow Diffusion Transformers for Real-World Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5075-5085} }
FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models: Tianyu Geng,

Wenfei Liang,

Sijie Wang,

Rui She,

Wee Peng Tay; [pdf] [supp]
[bibtex]
@InProceedings{Geng_2026_CVPR, author = {Geng, Tianyu and Liang, Wenfei and Wang, Sijie and She, Rui and Tay, Wee Peng}, title = {FedErase: Personalized Federated Unlearning for Text-to-Image Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4035-4044} }
SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations: Wenhao Yan,

Sheng Ye,

Zhuoyi Yang,

Jiayan Teng,

ZhenHui Dong,

Kairui Wen,

Xiaotao Gu,

Yong-Jin Liu,

Jie Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2026_CVPR, author = {Yan, Wenhao and Ye, Sheng and Yang, Zhuoyi and Teng, Jiayan and Dong, ZhenHui and Wen, Kairui and Gu, Xiaotao and Liu, Yong-Jin and Tang, Jie}, title = {SCAIL: Towards Studio-Grade Character Animation via In-Context Learning of 3D-Consistent Pose Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4450-4460} }
Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models: Jangho Park,

Taesung Kwon,

Jong Chul Ye; [pdf] [supp]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Jangho and Kwon, Taesung and Ye, Jong Chul}, title = {Zero4D: Training-Free 4D Video Generation From Single Video Using Off-the-Shelf Video Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4045-4054} }
TAUE: Training-free Noise Transplant and Cultivation Diffusion Model: Daichi Nagai,

Ryugo Morita,

Shunsuke Kitada,

Hitoshi Iyatomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagai_2026_CVPR, author = {Nagai, Daichi and Morita, Ryugo and Kitada, Shunsuke and Iyatomi, Hitoshi}, title = {TAUE: Training-free Noise Transplant and Cultivation Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3749-3758} }
Fast Autoregressive Video Generation with Diagonal Decoding: Yang Ye,

Junliang Guo,

Haoyu Wu,

Tianyu He,

Tim Pearce,

Tabish Rashid,

Katja Hofmann,

Jiang Bian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yang and Guo, Junliang and Wu, Haoyu and He, Tianyu and Pearce, Tim and Rashid, Tabish and Hofmann, Katja and Bian, Jiang}, title = {Fast Autoregressive Video Generation with Diagonal Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4419-4428} }
HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models: Yeqi He,

Liang Li,

Zhiwen Yang,

Xichun Sheng,

Zhidong Zhao,

Chenggang Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yeqi and Li, Liang and Yang, Zhiwen and Sheng, Xichun and Zhao, Zhidong and Yan, Chenggang}, title = {HAM: A Training-Free Style Transfer Approach via Heterogeneous Attention Modulation for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3914-3923} }
PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images: Jinze Zhao,

Keyi Han,

Qiushi Huang,

Jie Tian,

Zhenhua Hu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2026_CVPR, author = {Zhao, Jinze and Han, Keyi and Huang, Qiushi and Tian, Jie and Hu, Zhenhua}, title = {PGDM: Physics-Guided Noise-Free Diffusion Model Based on Point Spread Function for Light-Scattering Removal in Unpaired Biomedical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5599-5608} }
FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation: Minh Khoa Le,

Kien Do,

Duc Thanh Nguyen,

Truyen Tran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_2026_CVPR, author = {Le, Minh Khoa and Do, Kien and Nguyen, Duc Thanh and Tran, Truyen}, title = {FrameDiT: Diffusion Transformer with Matrix Attention for Efficient Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4234-4244} }
FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation: Minseok Oh,

Jihun Park,

Jongmin Gim,

Minwoo Choi,

Kyoungmin Lee,

Ferdinando Fioretto,

Sunghoon Im; [pdf] [supp]
[bibtex]
@InProceedings{Oh_2026_CVPR, author = {Oh, Minseok and Park, Jihun and Gim, Jongmin and Choi, Minwoo and Lee, Kyoungmin and Fioretto, Ferdinando and Im, Sunghoon}, title = {FREESTYLE: An Anchor-Free Mechanism for Training-Free Style-Aligned Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3779-3788} }
SAT: Selective Aggregation Transformer for Image Super-Resolution: Dinh Phu Tran,

Thao Do,

Saad Wazir,

Seongah Kim,

Seon Kwon Kim,

Daeyoung Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2026_CVPR, author = {Tran, Dinh Phu and Do, Thao and Wazir, Saad and Kim, Seongah and Kim, Seon Kwon and Kim, Daeyoung}, title = {SAT: Selective Aggregation Transformer for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4982-4992} }
Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA: Zexi Wu,

Baolu Li,

Jing Dai,

Yiming Zhang,

Yue Ma,

Qinghe Wang,

Xu Jia,

Hongming Xu; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Zexi and Li, Baolu and Dai, Jing and Zhang, Yiming and Ma, Yue and Wang, Qinghe and Jia, Xu and Xu, Hongming}, title = {Video2LoRA: Unified Semantic-Controlled Video Generation via Per-Reference-Video LoRA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4202-4212} }
Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval: Xintao Zong,

Wenxuan Liu,

Jianhao Ding,

Zhaofei Yu,

Xian Zhong,

Tiejun Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zong_2026_CVPR, author = {Zong, Xintao and Liu, Wenxuan and Ding, Jianhao and Yu, Zhaofei and Zhong, Xian and Huang, Tiejun}, title = {Brain-Inspired Multimodal Spike Neural Network for Image-Text Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5137-5146} }
WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos: Yufei Ye,

Jiaman Li,

Ryan Rong,

C. Karen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Yufei and Li, Jiaman and Rong, Ryan and Liu, C. Karen}, title = {WHOLE: World-Grounded Hand-Object Lifted from Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3481-3491} }
E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models: Shengjun Zhang,

Zhang Zhang,

Chensheng Dai,

Yueqi Duan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Shengjun and Zhang, Zhang and Dai, Chensheng and Duan, Yueqi}, title = {E-GRPO: High Entropy Steps Drive Effective Reinforcement Learning for Flow Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4429-4439} }
Video Generation Models are Good Latent Reward Models: Xiaoyue Mi,

Wenqing Yu,

Jiesong Lian,

Shibo Jie,

Ruizhe Zhong,

Zijun Liu,

Guozhen Zhang,

Zixiang Zhou,

Zhiyong Xu,

Yuan Zhou,

Qinglin Lu,

Fan Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mi_2026_CVPR, author = {Mi, Xiaoyue and Yu, Wenqing and Lian, Jiesong and Jie, Shibo and Zhong, Ruizhe and Liu, Zijun and Zhang, Guozhen and Zhou, Zixiang and Xu, Zhiyong and Zhou, Yuan and Lu, Qinglin and Tang, Fan}, title = {Video Generation Models are Good Latent Reward Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4719-4728} }
From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation: Evren Çetinkaya,

Sangmin Lee,

Jung Uk Kim,

Hong Joo Lee,

Nassir Navab; [pdf] [supp]
[bibtex]
@InProceedings{Cetinkaya_2026_CVPR, author = {\c{C}etinkaya, Evren and Lee, Sangmin and Kim, Jung Uk and Lee, Hong Joo and Navab, Nassir}, title = {From Adaptation to Generalization: Adaptive Visual Prompting for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5325-5335} }
StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space: Tjark Behrens,

Anton Obukhov,

Bingxin Ke,

Fabio Tosi,

Matteo Poggi,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Behrens_2026_CVPR, author = {Behrens, Tjark and Obukhov, Anton and Ke, Bingxin and Tosi, Fabio and Poggi, Matteo and Schindler, Konrad}, title = {StereoSpace: Depth-Free Synthesis of Stereo Geometry via End-to-End Diffusion in a Canonical Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3975-3987} }
Do Vision Models Perceive Illusory Motion in Static Images Like Humans?: Isabella E. Rosario,

Fan L. Cheng,

Zitang Sun,

Nikolaus Kriegeskorte; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rosario_2026_CVPR, author = {Rosario, Isabella E. and Cheng, Fan L. and Sun, Zitang and Kriegeskorte, Nikolaus}, title = {Do Vision Models Perceive Illusory Motion in Static Images Like Humans?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5515-5524} }
Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI: Athena Taymourtash,

S Mazdak Abulnaga,

Esra Abaci-Turk,

P Ellen Grant,

Polina Golland; [pdf] [supp]
[bibtex]
@InProceedings{Taymourtash_2026_CVPR, author = {Taymourtash, Athena and Abulnaga, S Mazdak and Abaci-Turk, Esra and Grant, P Ellen and Golland, Polina}, title = {Volumetrically Consistent Implicit Atlas Learning via Neural Diffeomorphic Flow for Placenta MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5285-5294} }
FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion: Enes Duran,

Nikos Athanasiou,

Muhammed Kocabas,

Michael J. Black,

Omid Taheri; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Duran_2026_CVPR, author = {Duran, Enes and Athanasiou, Nikos and Kocabas, Muhammed and Black, Michael J. and Taheri, Omid}, title = {FUSION: Full-body Unified Motion Prior for Body and Hands Via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3438-3448} }
Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains: Xiaokang Pan,

Zhizhong Zhang,

Yangyuan Liu,

Zhuoran Chen,

Zhiwei Zhang,

Bin Ji,

Mingang Chen,

Yong Xie,

Jingyu Gong,

Xuhong Wang,

Xin Tan,

Yuan Xie; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2026_CVPR, author = {Pan, Xiaokang and Zhang, Zhizhong and Liu, Yangyuan and Chen, Zhuoran and Zhang, Zhiwei and Ji, Bin and Chen, Mingang and Xie, Yong and Gong, Jingyu and Wang, Xuhong and Tan, Xin and Xie, Yuan}, title = {Fast-HOI: Fast Human-Object Interaction Synthesis via Distilled Interaction Prior and Physical Constrains}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3719-3728} }
PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation: Yuanlong Wang,

Weichi Chen,

Adrian Rajab,

Wenfang Liu,

Yulan Jin,

Andrew Srisuwananukorn,

Ping Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yuanlong and Chen, Weichi and Rajab, Adrian and Liu, Wenfang and Jin, Yulan and Srisuwananukorn, Andrew and Zhang, Ping}, title = {PBSBench: A Multi-Level Vision-Language Framework and Benchmark for Hematopathology Whole Slide Image Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5569-5578} }
Conformal Cross-Modal Active Learning: Huy Hoang Nguyen,

Cédric Jung,

Shirin Salehi,

Tobias Glück,

Anke Schmeink,

Andreas Kugi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2026_CVPR, author = {Nguyen, Huy Hoang and Jung, C\'edric and Salehi, Shirin and Gl\"uck, Tobias and Schmeink, Anke and Kugi, Andreas}, title = {Conformal Cross-Modal Active Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5147-5157} }
HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching: Anirban Ray,

Ashesh Ashesh,

Florian Jug; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ray_2026_CVPR, author = {Ray, Anirban and Ashesh, Ashesh and Jug, Florian}, title = {HazeMatching: Dehazing Light Microscopy Images with Guided Conditional Flow Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5652-5661} }
Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication: Yi Zhang,

Hongbo Huang,

Liang-Jie Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Yi and Huang, Hongbo and Zhang, Liang-Jie}, title = {Gaussian Shannon: High-Precision Diffusion Model Watermarking Based on Communication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3924-3932} }
Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering: Zhengzhong Zhu,

Pei Zhou,

Lanxi Bai,

Jia Nie,

Li Cheng,

Shiquan Min,

Jiangping Zhu; [pdf]
[bibtex]
@InProceedings{Zhu_2026_CVPR, author = {Zhu, Zhengzhong and Zhou, Pei and Bai, Lanxi and Nie, Jia and Cheng, Li and Min, Shiquan and Zhu, Jiangping}, title = {Prototype and Sample Level Semantic Alignment for Incomplete Multi-View Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5818-5827} }
Diffusion^2: Turning 3D Environments into Radio Frequency Heatmaps: Kyoungjun Park,

Yifan Yang,

Changhan Ge,

Lili Qiu,

Shiqi Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2026_CVPR, author = {Park, Kyoungjun and Yang, Yifan and Ge, Changhan and Qiu, Lili and Jiang, Shiqi}, title = {Diffusion{\textasciicircum}2: Turning 3D Environments into Radio Frequency Heatmaps}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6414-6423} }
Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now: Varun Varma Thozhiyoor,

Shivam Tripathi,

Venkatesh Babu Radhakrishnan,

Anand Bhattad; [pdf] [supp]
[bibtex]
@InProceedings{Thozhiyoor_2026_CVPR, author = {Thozhiyoor, Varun Varma and Tripathi, Shivam and Radhakrishnan, Venkatesh Babu and Bhattad, Anand}, title = {Objects in Generated Videos Are Slower Than They Appear: Models Suffer Sub-Earth Gravity and Don't Know Galileo's Principle...for now}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3830-3839} }
One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion: Jinxi Liu,

Zijian He,

Guangrun Wang,

Guanbin Li,

Liang Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Jinxi and He, Zijian and Wang, Guangrun and Li, Guanbin and Lin, Liang}, title = {One Model for All: Unified Try-On and Try-Off in Any Pose via LLM-Inspired Bidirectional Tweedie Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4310-4320} }
Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection: Zhen Zhang,

Zhaorong Dong,

Xiao Yang,

Liqin Huang,

Qiang Wu,

Taidui Zeng,

Hanyu Zheng,

Mingjing Yang,

Shaohua Zheng,

Wangbin Ding,

Lin Pan; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Zhen and Dong, Zhaorong and Yang, Xiao and Huang, Liqin and Wu, Qiang and Zeng, Taidui and Zheng, Hanyu and Yang, Mingjing and Zheng, Shaohua and Ding, Wangbin and Pan, Lin}, title = {Surgical Procedural Planning as 3D World Modelling: Towards Automated Pulmonary Resection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5315-5324} }
A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability: Giacomo Astolfi,

Matteo Bianchi,

Riccardo Campi,

Antonio De Santis,

Marco Brambilla; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Astolfi_2026_CVPR, author = {Astolfi, Giacomo and Bianchi, Matteo and Campi, Riccardo and De Santis, Antonio and Brambilla, Marco}, title = {A Framework for Evaluating Zero-Shot Image Generation in Concept-Based Explainability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3303-3311} }
Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction: Nazrul Ismail,

Owais Ahmed Malik,

Ong Wee Hong; [pdf]
[bibtex]
@InProceedings{Ismail_2026_CVPR, author = {Ismail, Nazrul and Malik, Owais Ahmed and Hong, Ong Wee}, title = {Visual2Echo Compositional Contrastive Learning (V2E-CCL): Binaural Knowledge Distilled Network for Depth Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6019-6028} }
A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition: Yanzhong Wang,

Daming Shi; [pdf]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Yanzhong and Shi, Daming}, title = {A Low-Rank Learning Framework Integrating Detection, Masking, and Recovery for Occluded Facial Expression Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6528-6537} }
No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching: Tingyan Wen,

Haoyu Li,

Yihuang Chen,

Xing Zhou,

Lifei Zhu,

XueQian Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2026_CVPR, author = {Wen, Tingyan and Li, Haoyu and Chen, Yihuang and Zhou, Xing and Zhu, Lifei and Wang, XueQian}, title = {No Cache Left Idle: Accelerating diffusion model via Extreme-Slimming Caching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4108-4117} }
LoViC: Efficient Long Video Generation with Context Compression: Jiaxiu Jiang,

Wenbo Li,

Jingjing Ren,

Yuping Qiu,

Renjing Pei,

Fenglong Song,

Yong Guo,

Xiaogang Xu,

Han Wu,

Wangmeng Zuo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2026_CVPR, author = {Jiang, Jiaxiu and Li, Wenbo and Ren, Jingjing and Qiu, Yuping and Pei, Renjing and Song, Fenglong and Guo, Yong and Xu, Xiaogang and Wu, Han and Zuo, Wangmeng}, title = {LoViC: Efficient Long Video Generation with Context Compression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4022-4034} }
TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis: Shunian Chen,

Hejin Huang,

Yexin Liu,

Zihan Ye,

Pengcheng Chen,

Chenghao Zhu,

Michael Guan,

Rongsheng Wang,

Junying Chen,

Jianye Hou,

Bo Li,

Guanbin Li,

Ser-Nam Lim,

Harry Yang,

Benyou Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2026_CVPR, author = {Chen, Shunian and Huang, Hejin and Liu, Yexin and Ye, Zihan and Chen, Pengcheng and Zhu, Chenghao and Guan, Michael and Wang, Rongsheng and Chen, Junying and Hou, Jianye and Li, Bo and Li, Guanbin and Lim, Ser-Nam and Yang, Harry and Wang, Benyou}, title = {TalkVid: A Large-Scale Diversified Dataset for Audio-Driven Talking Head Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3492-3500} }
Loom: Diffusion-Transformer for Interleaved Generation: Mingcheng Ye,

Jiaming Liu,

Yiren Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Ye_2026_CVPR, author = {Ye, Mingcheng and Liu, Jiaming and Song, Yiren}, title = {Loom: Diffusion-Transformer for Interleaved Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4582-4592} }
Concept Erasure via Attention Redirection: Amit Schechter,

Rinon Gal,

Ofir Kedem,

Gal Chechik,

Daniel Cohen-Or; [pdf] [supp]
[bibtex]
@InProceedings{Schechter_2026_CVPR, author = {Schechter, Amit and Gal, Rinon and Kedem, Ofir and Chechik, Gal and Cohen-Or, Daniel}, title = {Concept Erasure via Attention Redirection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4572-4581} }
FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval: François Gardères,

Camille-Sovanneary Gauthier,

Jean Ponce,

Shizhe Chen; [pdf] [supp]
[bibtex]
@InProceedings{Garderes_2026_CVPR, author = {Gard\`eres, Fran\c{c}ois and Gauthier, Camille-Sovanneary and Ponce, Jean and Chen, Shizhe}, title = {FIRE-CIR: Fine-grained Reasoning for Composed Fashion Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5694-5703} }
BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance: Yufei Huo,

Ao Li,

Wenxun Dai,

Songli Wu,

Yansong Tang; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2026_CVPR, author = {Huo, Yufei and Li, Ao and Dai, Wenxun and Wu, Songli and Tang, Yansong}, title = {BridgeDiffusion: Latent Space Optimization for Independent Body-Part Generation with Motion Consistency Bridges in Interactive Dance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3449-3459} }
VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning: Minghong Cai,

Qiulin Wang,

Zongli Ye,

Wenze Liu,

Quande Liu,

Weicai Ye,

Xintao Wang,

Pengfei Wan,

Kun Gai,

Xiangyu Yue; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2026_CVPR, author = {Cai, Minghong and Wang, Qiulin and Ye, Zongli and Liu, Wenze and Liu, Quande and Ye, Weicai and Wang, Xintao and Wan, Pengfei and Gai, Kun and Yue, Xiangyu}, title = {VideoCanvas: Unified Video Completion from Arbitrary Spatiotemporal Patches via In-Context Conditioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4475-4485} }
Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition: Yu He,

Ting Zhu,

Yichun Liu,

Lichen Ma,

Xinyuan Shan,

Jingling Fu,

Yu Shi,

Junshi Huang,

Yan Li; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Yu and Zhu, Ting and Liu, Yichun and Ma, Lichen and Shan, Xinyuan and Fu, Jingling and Shi, Yu and Huang, Junshi and Li, Yan}, title = {Fashion130K: An E-commerce Fashion Dataset for Outfit Generation with Unified Multi-modal Condition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4769-4779} }
PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation: Jingxuan He,

Busheng Su,

Finn Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2026_CVPR, author = {He, Jingxuan and Su, Busheng and Wong, Finn}, title = {PoseGen: In-Context LoRA Finetuning for Pose-Controllable Long Human Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4780-4789} }
Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP: Naman Deep Singh,

Francesco Croce,

Matthias Hein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2026_CVPR, author = {Singh, Naman Deep and Croce, Francesco and Hein, Matthias}, title = {Perturb and Recover: Fine-Tuning for Effective Backdoor Removal from CLIP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6164-6173} }
Vision-Language Models for Automated 3D PET/CT Report Generation: Wenpei Jiao,

Ke Yan,

Jiajin Zhang,

Dakai Jin,

Zhaoheng Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiao_2026_CVPR, author = {Jiao, Wenpei and Yan, Ke and Zhang, Jiajin and Jin, Dakai and Xie, Zhaoheng}, title = {Vision-Language Models for Automated 3D PET/CT Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5295-5304} }
Animated-ART: Multi-Layer Transparent Video Generation: Ziqiang Li,

Yunnan Wang,

Dong Chen,

Yue Dong,

Ji Li,

Yuhui Yuan,

Xin Jin; [pdf]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Ziqiang and Wang, Yunnan and Chen, Dong and Dong, Yue and Li, Ji and Yuan, Yuhui and Jin, Xin}, title = {Animated-ART: Multi-Layer Transparent Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4150-4159} }
GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light & Camera: Jiaye Wu,

Saeed Hadadan,

Geng Lin,

Peihan Tu,

Matthias Zwicker,

David Jacobs,

Roni Sengupta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Jiaye and Hadadan, Saeed and Lin, Geng and Tu, Peihan and Zwicker, Matthias and Jacobs, David and Sengupta, Roni}, title = {GLOW: Global Illumination-Aware Inverse Rendering of Indoor Scenes Captured with Dynamic Co-Located Light \& Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6445-6455} }
PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation: Aaron Appelle,

Jerome P. Lynch; [pdf] [supp]
[bibtex]
@InProceedings{Appelle_2026_CVPR, author = {Appelle, Aaron and Lynch, Jerome P.}, title = {PEDRA: Evaluating the Realism of Pedestrian Dynamics in Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4461-4474} }
Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation: Zhibin Wan,

Zhiqiang Gao,

Mingjie Sun,

Yupei Wu,

Guohong Fu,

Ran Yi; [pdf]
[bibtex]
@InProceedings{Wan_2026_CVPR, author = {Wan, Zhibin and Gao, Zhiqiang and Sun, Mingjie and Wu, Yupei and Fu, Guohong and Yi, Ran}, title = {Attention-Guided Energy Optimization for Label-Aligned Anomaly Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4170-4179} }
ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks: Abdullah Tariq,

Bisma Saleem,

R Muhammad Atif Azad,

Martin Masek,

Syed Zulqarnain Gilani; [pdf] [supp]
[bibtex]
@InProceedings{Tariq_2026_CVPR, author = {Tariq, Abdullah and Saleem, Bisma and Azad, R Muhammad Atif and Masek, Martin and Gilani, Syed Zulqarnain}, title = {ConSel: Concept-Aware Self-supervised Learning for Regression Beyond Ordinal Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6548-6559} }
ColorMam: Color-Aware State Space Model for Image Color Style Transfer: Jian Li,

Jiaxin Peng,

Yuchen Li,

Siwang Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Jian and Peng, Jiaxin and Li, Yuchen and Zhou, Siwang}, title = {ColorMam: Color-Aware State Space Model for Image Color Style Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4379-4388} }
Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router: Yubo Huang,

Weiqiang Wang,

Sirui Zhao,

Tong Xu,

Lin Liu,

Enhong Chen; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2026_CVPR, author = {Huang, Yubo and Wang, Weiqiang and Zhao, Sirui and Xu, Tong and Liu, Lin and Chen, Enhong}, title = {Bind-Your-Avatar: Multi-Character-Talking Video Generation with Dynamic 3D-mask-based Embedding Router}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4440-4449} }
Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D: Agniv Sharma,

Xianghui Xie,

Tom Fischer,

Eddy Ilg,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2026_CVPR, author = {Sharma, Agniv and Xie, Xianghui and Fischer, Tom and Ilg, Eddy and Pons-Moll, Gerard}, title = {Hoi3DGen: Generating High-Quality Human-Object-Interactions in 3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3405-3416} }
Unbiased Dynamic Multimodal Fusion: Shicai Wei,

Kaijie Zhang,

Luyi Chen,

Tao He,

Guiduo Duan; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2026_CVPR, author = {Wei, Shicai and Zhang, Kaijie and Chen, Luyi and He, Tao and Duan, Guiduo}, title = {Unbiased Dynamic Multimodal Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6239-6249} }
Future Optical Flow Prediction Improves Robot Control and Video Generation: Kanchana Ranasinghe,

Honglu Zhou,

Yu Fang,

Luyu Yang,

Le Xue,

Ran Xu,

Caiming Xiong,

Silvio Savarese,

Michael S Ryoo,

Juan Carlos Niebles; [pdf] [supp]
[bibtex]
@InProceedings{Ranasinghe_2026_CVPR, author = {Ranasinghe, Kanchana and Zhou, Honglu and Fang, Yu and Yang, Luyu and Xue, Le and Xu, Ran and Xiong, Caiming and Savarese, Silvio and Ryoo, Michael S and Niebles, Juan Carlos}, title = {Future Optical Flow Prediction Improves Robot Control and Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4528-4540} }
ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors: Kaede Shiohara,

Toshihiko Yamasaki,

Vladislav Golyanik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shiohara_2026_CVPR, author = {Shiohara, Kaede and Yamasaki, Toshihiko and Golyanik, Vladislav}, title = {ExposeAnyone: Personalized Audio-to-Expression Diffusion Models Are Robust Zero-Shot Face Forgery Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3665-3676} }
OminiControl2: Efficient Conditioning for Diffusion Transformers: Zhenxiong Tan,

Qiaochu Xue,

Xingyi Yang,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2026_CVPR, author = {Tan, Zhenxiong and Xue, Qiaochu and Yang, Xingyi and Liu, Songhua and Wang, Xinchao}, title = {OminiControl2: Efficient Conditioning for Diffusion Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4256-4265} }
ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation: Bingkun Nian,

Fenghe Tang,

Zhiwei Ning,

Dongsheng Jiang,

Yin Li,

JIE Yang,

Rong Xiao,

Shaohua Kevin Zhou,

Wei Liu; [pdf]
[bibtex]
@InProceedings{Nian_2026_CVPR, author = {Nian, Bingkun and Tang, Fenghe and Ning, Zhiwei and Jiang, Dongsheng and Li, Yin and Yang, JIE and Xiao, Rong and Zhou, Shaohua Kevin and Liu, Wei}, title = {ProSM: Progressive Soft Masking for Fine-Grained Remote Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6371-6381} }
Low-Bitrate Video Compression through Semantic-Conditioned Diffusion: Lingdong Wang,

Guan-Ming Su,

Divya Kothandaraman,

Tsung-Wei Huang,

Mohammad Hajiesmaili,

Ramesh K. Sitaraman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Lingdong and Su, Guan-Ming and Kothandaraman, Divya and Huang, Tsung-Wei and Hajiesmaili, Mohammad and Sitaraman, Ramesh K.}, title = {Low-Bitrate Video Compression through Semantic-Conditioned Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4495-4505} }
Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective: Chenwei Wu,

Zitao Shuai,

Liyue Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Chenwei and Shuai, Zitao and Shen, Liyue}, title = {Rethinking Medical High-Modality Learning Under Missingness -- A Long-Tailed Distribution Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5641-5651} }
Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification: Shihao Shan,

Hongying Liu,

Fanhua Shang,

Qian Wang,

Yang Song; [pdf]
[bibtex]
@InProceedings{Shan_2026_CVPR, author = {Shan, Shihao and Liu, Hongying and Shang, Fanhua and Wang, Qian and Song, Yang}, title = {Uncertainty-Aware Cross-Modal Opinion Interaction: A General Frameworkfor Visible-Infrared Vehicle and Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6476-6485} }
Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation: Xinya Song,

Bo Yang,

Ying Cao; [pdf] [supp]
[bibtex]
@InProceedings{Song_2026_CVPR, author = {Song, Xinya and Yang, Bo and Cao, Ying}, title = {Harnessing Layered Graphic Designs with Real Intentions for Text-to-Design Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4729-4738} }
COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning: Can Zhang,

Ruirui Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Can and Li, Ruirui}, title = {COSTA: Collaborative Open-Set Test-Time Adaptation Through Robust Prototype Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6154-6163} }
Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting: Xiao Zhang,

Guangshuang Tan,

Jie Hu,

Shichao Kan,

Bing Jiang,

Yixiong Liang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2026_CVPR, author = {Zhang, Xiao and Tan, Guangshuang and Hu, Jie and Kan, Shichao and Jiang, Bing and Liang, Yixiong}, title = {Generative Vision-Language Multiple Instance Learning for Weakly Supervised Neonatal Fundus Screening and Reporting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5190-5200} }
CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors: Bikram Boote,

Junho Kim,

Ozgur Kara,

Sangmin Lee,

James M Rehg; [pdf] [supp]
[bibtex]
@InProceedings{Boote_2026_CVPR, author = {Boote, Bikram and Kim, Junho and Kara, Ozgur and Lee, Sangmin and Rehg, James M}, title = {CoherentHand: Temporally Consistent 3D Hand Trajectory Synthesis with Semantic Motion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3417-3427} }
Learning Spatial-Preserving Hierarchical Representations for Digital Pathology: Weiyi Wu,

Xingjian Diao,

Chunhui Zhang,

Chongyang Gao,

Xinwen Xu,

Siting Li,

Jiang Gui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2026_CVPR, author = {Wu, Weiyi and Diao, Xingjian and Zhang, Chunhui and Gao, Chongyang and Xu, Xinwen and Li, Siting and Gui, Jiang}, title = {Learning Spatial-Preserving Hierarchical Representations for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {5484-5494} }
Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference: Xuecheng Li,

Weikuan Jia,

Yuanjie Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Li_2026_CVPR, author = {Li, Xuecheng and Jia, Weikuan and Zheng, Yuanjie}, title = {Learning to Walk the Right Paths: Task-Responsive Graph Reasoning for Multimodal Inference}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6040-6050} }
RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification: Zhen Yang,

Guibao Shen,

Minyang Li,

Liang Hou,

Mushui Liu,

Luozhou Wang,

Xin Tao,

Ying-Cong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2026_CVPR, author = {Yang, Zhen and Shen, Guibao and Li, Minyang and Hou, Liang and Liu, Mushui and Wang, Luozhou and Tao, Xin and Chen, Ying-Cong}, title = {RectifiedHR: Enable Efficient High-Resolution Synthesis via Energy Rectification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {3809-3819} }
Video Reasoning Without Training: Deepak Sridhar,

Kartikeya Bhardwaj,

Jeya Pradha Jeyaraj,

Nuno Vasconcelos,

Ankita Nayak,

Harris Teague; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sridhar_2026_CVPR, author = {Sridhar, Deepak and Bhardwaj, Kartikeya and Jeyaraj, Jeya Pradha and Vasconcelos, Nuno and Nayak, Ankita and Teague, Harris}, title = {Video Reasoning Without Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {6250-6260} }
Towards Source-Aware Object Swapping with Initial Noise Perturbation: Jiahui Zhan,

Xianbing Sun,

Xiangnan Zhu,

Yikun Ji,

Ruitong Liu,

Liqing Zhang,

Jianfu Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2026_CVPR, author = {Zhan, Jiahui and Sun, Xianbing and Zhu, Xiangnan and Ji, Yikun and Liu, Ruitong and Zhang, Liqing and Zhang, Jianfu}, title = {Towards Source-Aware Object Swapping with Initial Noise Perturbation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4400-4409} }
TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation: Zhenzhi Wang,

Jian Wang,

Ke Ma,

Dahua Lin,

Bing Zhou; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2026_CVPR, author = {Wang, Zhenzhi and Wang, Jian and Ma, Ke and Lin, Dahua and Zhou, Bing}, title = {TalkVerse: Democratizing Minute-Long Audio-Driven Video Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4516-4527} }
Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution: Mingyu Choi,

Woo Kyoung Han,

Sunghoon Im,

Kyong Hwan Jin; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2026_CVPR, author = {Choi, Mingyu and Han, Woo Kyoung and Im, Sunghoon and Jin, Kyong Hwan}, title = {Linear Recurrent Unit with Semantic Modulation for Image Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4950-4960} }
RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement: Boheng Liu,

Ziyu Li,

Zhong Zhang,

Mengrui Xu,

Chenghua Duan,

Dehao Liu,

Qing Li,

Xia Wu; [pdf]
[bibtex]
@InProceedings{Liu_2026_CVPR, author = {Liu, Boheng and Li, Ziyu and Zhang, Zhong and Xu, Mengrui and Duan, Chenghua and Liu, Dehao and Li, Qing and Wu, Xia}, title = {RodNet: Visual Pathway-Inspired Adaptive Sparse Network for Efficient Low-Light Image Enhancement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Findings}, month = {June}, year = {2026}, pages = {4961-4970} }; Back