Papers

Back

Seeing the World through Your Eyes
Hadi Alzayer,
Kevin Zhang,
Brandon Feng,
Christopher A. Metzler,
Jia-Bin Huang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2024_CVPR, author = {Alzayer, Hadi and Zhang, Kevin and Feng, Brandon and Metzler, Christopher A. and Huang, Jia-Bin}, title = {Seeing the World through Your Eyes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4864-4873} }

Ungeneralizable Examples
Jingwen Ye,
Xinchao Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Wang, Xinchao}, title = {Ungeneralizable Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11944-11953} }

LaneCPP: Continuous 3D Lane Detection using Physical Priors
Maximilian Pittner,
Joel Janai,
Alexandru P. Condurache
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pittner_2024_CVPR, author = {Pittner, Maximilian and Janai, Joel and Condurache, Alexandru P.}, title = {LaneCPP: Continuous 3D Lane Detection using Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10639-10648} }

CityDreamer: Compositional Generative Model of Unbounded 3D Cities
Haozhe Xie,
Zhaoxi Chen,
Fangzhou Hong,
Ziwei Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {CityDreamer: Compositional Generative Model of Unbounded 3D Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9666-9675} }

Action Detection via an Image Diffusion Process
Lin Geng Foo,
Tianjiao Li,
Hossein Rahmani,
Jun Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Foo_2024_CVPR, author = {Foo, Lin Geng and Li, Tianjiao and Rahmani, Hossein and Liu, Jun}, title = {Action Detection via an Image Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18351-18361} }

ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis
Xiangjun Gao,
Xiaoyu Li,
Chaopeng Zhang,
Qi Zhang,
Yanpei Cao,
Ying Shan,
Long Quan
[pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhang, Chaopeng and Zhang, Qi and Cao, Yanpei and Shan, Ying and Quan, Long}, title = {ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10084-10094} }

Streaming Dense Video Captioning
Xingyi Zhou,
Anurag Arnab,
Shyamal Buch,
Shen Yan,
Austin Myers,
Xuehan Xiong,
Arsha Nagrani,
Cordelia Schmid
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xingyi and Arnab, Anurag and Buch, Shyamal and Yan, Shen and Myers, Austin and Xiong, Xuehan and Nagrani, Arsha and Schmid, Cordelia}, title = {Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18243-18252} }

Rethinking Inductive Biases for Surface Normal Estimation
Gwangbin Bae,
Andrew J. Davison
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2024_CVPR, author = {Bae, Gwangbin and Davison, Andrew J.}, title = {Rethinking Inductive Biases for Surface Normal Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9535-9545} }

Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity
Yuhang Chen,
Wenke Huang,
Mang Ye
[pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yuhang and Huang, Wenke and Ye, Mang}, title = {Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12077-12086} }

HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding
Trong-Thuan Nguyen,
Pha Nguyen,
Khoa Luu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Luu, Khoa}, title = {HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18384-18394} }

OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising
Haichao Zhang,
Yi Xu,
Hongsheng Lu,
Takayuki Shimizu,
Yun Fu
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haichao and Xu, Yi and Lu, Hongsheng and Shimizu, Takayuki and Fu, Yun}, title = {OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14802-14811} }

FADES: Fair Disentanglement with Sensitive Relevance
Taeuk Jang,
Xiaoqian Wang
[pdf] [supp]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Taeuk and Wang, Xiaoqian}, title = {FADES: Fair Disentanglement with Sensitive Relevance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12067-12076} }

Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations
Kewei Wang,
Yizheng Wu,
Jun Cen,
Zhiyu Pan,
Xingyi Li,
Zhe Wang,
Zhiguo Cao,
Guosheng Lin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Kewei and Wu, Yizheng and Cen, Jun and Pan, Zhiyu and Li, Xingyi and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14638-14647} }

CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection
Mikhail Kennerley,
Jian-Gang Wang,
Bharadwaj Veeravalli,
Robby T. Tan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kennerley_2024_CVPR, author = {Kennerley, Mikhail and Wang, Jian-Gang and Veeravalli, Bharadwaj and Tan, Robby T.}, title = {CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16541-16550} }

An Empirical Study of Scaling Law for Scene Text Recognition
Miao Rang,
Zhenni Bi,
Chuanjian Liu,
Yunhe Wang,
Kai Han
[pdf] [supp]
[bibtex]
@InProceedings{Rang_2024_CVPR, author = {Rang, Miao and Bi, Zhenni and Liu, Chuanjian and Wang, Yunhe and Han, Kai}, title = {An Empirical Study of Scaling Law for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15619-15629} }

Text2Loc: 3D Point Cloud Localization from Natural Language
Yan Xia,
Letian Shi,
Zifeng Ding,
Joao F. Henriques,
Daniel Cremers
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Yan and Shi, Letian and Ding, Zifeng and Henriques, Joao F. and Cremers, Daniel}, title = {Text2Loc: 3D Point Cloud Localization from Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14958-14967} }

Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework
Vu Minh Hieu Phan,
Yutong Xie,
Yuankai Qi,
Lingqiao Liu,
Liyang Liu,
Bowen Zhang,
Zhibin Liao,
Qi Wu,
Minh-Son To,
Johan W. Verjans
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phan_2024_CVPR, author = {Phan, Vu Minh Hieu and Xie, Yutong and Qi, Yuankai and Liu, Lingqiao and Liu, Liyang and Zhang, Bowen and Liao, Zhibin and Wu, Qi and To, Minh-Son and Verjans, Johan W.}, title = {Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11492-11501} }

Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views
Ziwei Zhao,
Yuchen Wang,
Chuhua Wang
[pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ziwei and Wang, Yuchen and Wang, Chuhua}, title = {Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16477-16487} }

Desigen: A Pipeline for Controllable Design Template Generation
Haohan Weng,
Danqing Huang,
Yu Qiao,
Zheng Hu,
Chin-Yew Lin,
Tong Zhang,
C. L. Philip Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2024_CVPR, author = {Weng, Haohan and Huang, Danqing and Qiao, Yu and Hu, Zheng and Lin, Chin-Yew and Zhang, Tong and Chen, C. L. Philip}, title = {Desigen: A Pipeline for Controllable Design Template Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12721-12732} }

Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers
Sanghyeok Lee,
Joonmyung Choi,
Hyunwoo J. Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15741-15750} }

ViewFusion: Towards Multi-View Consistency via Interpolated Denoising
Xianghui Yang,
Yan Zuo,
Sameera Ramasinghe,
Loris Bazzani,
Gil Avraham,
Anton van den Hengel
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xianghui and Zuo, Yan and Ramasinghe, Sameera and Bazzani, Loris and Avraham, Gil and van den Hengel, Anton}, title = {ViewFusion: Towards Multi-View Consistency via Interpolated Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9870-9880} }

SketchINR: A First Look into Sketches as Implicit Neural Representations
Hmrishav Bandyopadhyay,
Ayan Kumar Bhunia,
Pinaki Nath Chowdhury,
Aneeshan Sain,
Tao Xiang,
Timothy Hospedales,
Yi-Zhe Song
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Bhunia, Ayan Kumar and Chowdhury, Pinaki Nath and Sain, Aneeshan and Xiang, Tao and Hospedales, Timothy and Song, Yi-Zhe}, title = {SketchINR: A First Look into Sketches as Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12565-12574} }

MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images
Junwen Huang,
Hao Yu,
Kuan-Ting Yu,
Nassir Navab,
Slobodan Ilic,
Benjamin Busam
[pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Junwen and Yu, Hao and Yu, Kuan-Ting and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10095-10105} }

Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization
Ye Chen,
Bingbing Ni,
Jinfan Liu,
Xiaoyang Huang,
Xuanhong Chen
[pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ye and Ni, Bingbing and Liu, Jinfan and Huang, Xiaoyang and Chen, Xuanhong}, title = {Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15877-15886} }

EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything
Yunyang Xiong,
Bala Varadarajan,
Lemeng Wu,
Xiaoyu Xiang,
Fanyi Xiao,
Chenchen Zhu,
Xiaoliang Dai,
Dilin Wang,
Fei Sun,
Forrest Iandola,
Raghuraman Krishnamoorthi,
Vikas Chandra
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yunyang and Varadarajan, Bala and Wu, Lemeng and Xiang, Xiaoyu and Xiao, Fanyi and Zhu, Chenchen and Dai, Xiaoliang and Wang, Dilin and Sun, Fei and Iandola, Forrest and Krishnamoorthi, Raghuraman and Chandra, Vikas}, title = {EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16111-16121} }

ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles
Jiawei Zhang,
Chejian Xu,
Bo Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiawei and Xu, Chejian and Li, Bo}, title = {ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15459-15469} }

Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge
Bo Zou,
Shaofeng Wang,
Hao Liu,
Gaoyue Sun,
Yajie Wang,
FeiFei Zuo,
Chengbin Quan,
Youjian Zhao
[pdf] [supp]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Wang, Shaofeng and Liu, Hao and Sun, Gaoyue and Wang, Yajie and Zuo, FeiFei and Quan, Chengbin and Zhao, Youjian}, title = {Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11601-11610} }

Bayesian Diffusion Models for 3D Shape Reconstruction
Haiyang Xu,
Yu Lei,
Zeyuan Chen,
Xiang Zhang,
Yue Zhao,
Yilin Wang,
Zhuowen Tu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Haiyang and Lei, Yu and Chen, Zeyuan and Zhang, Xiang and Zhao, Yue and Wang, Yilin and Tu, Zhuowen}, title = {Bayesian Diffusion Models for 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10628-10638} }

CrossKD: Cross-Head Knowledge Distillation for Object Detection
Jiabao Wang,
Yuming Chen,
Zhaohui Zheng,
Xiang Li,
Ming-Ming Cheng,
Qibin Hou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jiabao and Chen, Yuming and Zheng, Zhaohui and Li, Xiang and Cheng, Ming-Ming and Hou, Qibin}, title = {CrossKD: Cross-Head Knowledge Distillation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16520-16530} }

Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation
Xin Fan,
Xiaolin Wang,
Jiaxin Gao,
Jia Wang,
Zhongxuan Luo,
Risheng Liu
[pdf] [supp]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Xin and Wang, Xiaolin and Gao, Jiaxin and Wang, Jia and Luo, Zhongxuan and Liu, Risheng}, title = {Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11726-11735} }

EscherNet: A Generative Model for Scalable View Synthesis
Xin Kong,
Shikun Liu,
Xiaoyang Lyu,
Marwan Taher,
Xiaojuan Qi,
Andrew J. Davison
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Xin and Liu, Shikun and Lyu, Xiaoyang and Taher, Marwan and Qi, Xiaojuan and Davison, Andrew J.}, title = {EscherNet: A Generative Model for Scalable View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9503-9513} }

MeaCap: Memory-Augmented Zero-shot Image Captioning
Zequn Zeng,
Yan Xie,
Hao Zhang,
Chiyu Chen,
Bo Chen,
Zhengjue Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Zequn and Xie, Yan and Zhang, Hao and Chen, Chiyu and Chen, Bo and Wang, Zhengjue}, title = {MeaCap: Memory-Augmented Zero-shot Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14100-14110} }

Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion
Hao Ai,
Lin Wang
[pdf] [supp]
[bibtex]
@InProceedings{Ai_2024_CVPR, author = {Ai, Hao and Wang, Lin}, title = {Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9926-9935} }

Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation
Qiyuan Dai,
Sibei Yang
[pdf] [arXiv]
[bibtex]
@InProceedings{Dai_2024_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13711-13722} }

EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition
Xu Zheng,
Lin Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xu and Wang, Lin}, title = {EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17448-17458} }

CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data
Wei Fang,
Yuxing Tang,
Heng Guo,
Mingze Yuan,
Tony C. W. Mok,
Ke Yan,
Jiawen Yao,
Xin Chen,
Zaiyi Liu,
Le Lu,
Ling Zhang,
Minfeng Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Wei and Tang, Yuxing and Guo, Heng and Yuan, Mingze and Mok, Tony C. W. and Yan, Ke and Yao, Jiawen and Chen, Xin and Liu, Zaiyi and Lu, Le and Zhang, Ling and Xu, Minfeng}, title = {CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11631-11641} }

Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models
Xinpeng Ding,
Jianhua Han,
Hang Xu,
Xiaodan Liang,
Wei Zhang,
Xiaomeng Li
[pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Xinpeng and Han, Jianhua and Xu, Hang and Liang, Xiaodan and Zhang, Wei and Li, Xiaomeng}, title = {Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13668-13677} }

Extreme Point Supervised Instance Segmentation
Hyeonjun Lee,
Sehyun Hwang,
Suha Kwak
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeonjun and Hwang, Sehyun and Kwak, Suha}, title = {Extreme Point Supervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17212-17222} }

MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant
Chenlu Zhan,
Yu Lin,
Gaoang Wang,
Hongwei Wang,
Jian Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2024_CVPR, author = {Zhan, Chenlu and Lin, Yu and Wang, Gaoang and Wang, Hongwei and Wu, Jian}, title = {MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11502-11512} }

Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction
Devikalyan Das,
Christopher Wewer,
Raza Yunus,
Eddy Ilg,
Jan Eric Lenssen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2024_CVPR, author = {Das, Devikalyan and Wewer, Christopher and Yunus, Raza and Ilg, Eddy and Lenssen, Jan Eric}, title = {Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10715-10725} }

PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness
Siyao Jiang,
Huisi Wu,
Junyang Chen,
Qin Zhang,
Jing Qin
[pdf] [supp]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Siyao and Wu, Huisi and Chen, Junyang and Zhang, Qin and Qin, Jing}, title = {PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11418-11427} }

ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More
Jiazhou Zhou,
Xu Zheng,
Yuanhuiyi Lyu,
Lin Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jiazhou and Zheng, Xu and Lyu, Yuanhuiyi and Wang, Lin}, title = {ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18633-18643} }

Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping
Hyeongjun Kwon,
Jinhyun Jang,
Jin Kim,
Kwonyoung Kim,
Kwanghoon Sohn
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2024_CVPR, author = {Kwon, Hyeongjun and Jang, Jinhyun and Kim, Jin and Kim, Kwonyoung and Sohn, Kwanghoon}, title = {Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17364-17374} }

ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks
Kai Han,
Yunhe Wang,
Jianyuan Guo,
Enhua Wu
[pdf]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Kai and Wang, Yunhe and Guo, Jianyuan and Wu, Enhua}, title = {ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15751-15761} }

Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation
Bingxin Ke,
Anton Obukhov,
Shengyu Huang,
Nando Metzger,
Rodrigo Caye Daudt,
Konrad Schindler
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2024_CVPR, author = {Ke, Bingxin and Obukhov, Anton and Huang, Shengyu and Metzger, Nando and Daudt, Rodrigo Caye and Schindler, Konrad}, title = {Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9492-9502} }

LLMs are Good Sign Language Translators
Jia Gong,
Lin Geng Foo,
Yixuan He,
Hossein Rahmani,
Jun Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Jia and Foo, Lin Geng and He, Yixuan and Rahmani, Hossein and Liu, Jun}, title = {LLMs are Good Sign Language Translators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18362-18372} }

Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer
Wenqiao Zhang,
Zheqi Lv,
Hao Zhou,
Jia-Wei Liu,
Juncheng Li,
Mengze Li,
Yunfei Li,
Dongping Zhang,
Yueting Zhuang,
Siliang Tang
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqiao and Lv, Zheqi and Zhou, Hao and Liu, Jia-Wei and Li, Juncheng and Li, Mengze and Li, Yunfei and Zhang, Dongping and Zhuang, Yueting and Tang, Siliang}, title = {Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16751-16761} }

Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification
Zhenyu Cui,
Jiahuan Zhou,
Xun Wang,
Manyu Zhu,
Yuxin Peng
[pdf] [supp]
[bibtex]
@InProceedings{Cui_2024_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Wang, Xun and Zhu, Manyu and Peng, Yuxin}, title = {Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16614-16623} }

CORES: Convolutional Response-based Score for Out-of-distribution Detection
Keke Tang,
Chao Hou,
Weilong Peng,
Runnan Chen,
Peican Zhu,
Wenping Wang,
Zhihong Tian
[pdf]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Chen, Runnan and Zhu, Peican and Wang, Wenping and Tian, Zhihong}, title = {CORES: Convolutional Response-based Score for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10916-10925} }

Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features
Youngmin Chung,
Ji Hun Ha,
Kyeong Chan Im,
Joo Sang Lee
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2024_CVPR, author = {Chung, Youngmin and Ha, Ji Hun and Im, Kyeong Chan and Lee, Joo Sang}, title = {Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11591-11600} }

Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion
Su Sun,
Cheng Zhao,
Yuliang Guo,
Ruoyu Wang,
Xinyu Huang,
Yingjie Victor Chen,
Liu Ren
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Su and Zhao, Cheng and Guo, Yuliang and Wang, Ruoyu and Huang, Xinyu and Chen, Yingjie Victor and Ren, Liu}, title = {Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12744-12753} }

VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding
Syed Talal Wasim,
Muzammal Naseer,
Salman Khan,
Ming-Hsuan Yang,
Fahad Shahbaz Khan
[pdf]
[bibtex]
@InProceedings{Wasim_2024_CVPR, author = {Wasim, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz}, title = {VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18909-18918} }

Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts
Jiayi Chen,
Benteng Ma,
Hengfei Cui,
Yong Xia
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Jiayi and Ma, Benteng and Cui, Hengfei and Xia, Yong}, title = {Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11439-11449} }

ViTamin: Designing Scalable Vision Models in the Vision-Language Era
Jieneng Chen,
Qihang Yu,
Xiaohui Shen,
Alan Yuille,
Liang-Chieh Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Jieneng and Yu, Qihang and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {ViTamin: Designing Scalable Vision Models in the Vision-Language Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12954-12966} }

Seeing the Unseen: Visual Common Sense for Semantic Placement
Ram Ramrakhya,
Aniruddha Kembhavi,
Dhruv Batra,
Zsolt Kira,
Kuo-Hao Zeng,
Luca Weihs
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramrakhya_2024_CVPR, author = {Ramrakhya, Ram and Kembhavi, Aniruddha and Batra, Dhruv and Kira, Zsolt and Zeng, Kuo-Hao and Weihs, Luca}, title = {Seeing the Unseen: Visual Common Sense for Semantic Placement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16273-16283} }

LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction
Bo Zou,
Chao Yang,
Yu Qiao,
Chengbin Quan,
Youjian Zhao
[pdf] [supp]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Yang, Chao and Qiao, Yu and Quan, Chengbin and Zhao, Youjian}, title = {LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14089-14099} }

Steerers: A Framework for Rotation Equivariant Keypoint Descriptors
Georg Bökman,
Johan Edstedt,
Michael Felsberg,
Fredrik Kahl
[pdf] [supp]
[bibtex]
@InProceedings{Bokman_2024_CVPR, author = {B\"okman, Georg and Edstedt, Johan and Felsberg, Michael and Kahl, Fredrik}, title = {Steerers: A Framework for Rotation Equivariant Keypoint Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4885-4895} }

Efficient Dataset Distillation via Minimax Diffusion
Jianyang Gu,
Saeed Vahidian,
Vyacheslav Kungurtsev,
Haonan Wang,
Wei Jiang,
Yang You,
Yiran Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Jianyang and Vahidian, Saeed and Kungurtsev, Vyacheslav and Wang, Haonan and Jiang, Wei and You, Yang and Chen, Yiran}, title = {Efficient Dataset Distillation via Minimax Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15793-15803} }

Posterior Distillation Sampling
Juil Koo,
Chanho Park,
Minhyuk Sung
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2024_CVPR, author = {Koo, Juil and Park, Chanho and Sung, Minhyuk}, title = {Posterior Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13352-13361} }

HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields
Haozhe Qi,
Chen Zhao,
Mathieu Salzmann,
Alexander Mathis
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Haozhe and Zhao, Chen and Salzmann, Mathieu and Mathis, Alexander}, title = {HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10392-10402} }

DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis
Yuming Gu,
Hongyi Xu,
You Xie,
Guoxian Song,
Yichun Shi,
Di Chang,
Jing Yang,
Linjie Luo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Yuming and Xu, Hongyi and Xie, You and Song, Guoxian and Shi, Yichun and Chang, Di and Yang, Jing and Luo, Linjie}, title = {DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10456-10465} }

H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration
Morteza Ghahremani,
Mohammad Khateri,
Bailiang Jian,
Benedikt Wiestler,
Ehsan Adeli,
Christian Wachinger
[pdf] [supp]
[bibtex]
@InProceedings{Ghahremani_2024_CVPR, author = {Ghahremani, Morteza and Khateri, Mohammad and Jian, Bailiang and Wiestler, Benedikt and Adeli, Ehsan and Wachinger, Christian}, title = {H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11513-11523} }

VideoLLM-online: Online Video Large Language Model for Streaming Video
Joya Chen,
Zhaoyang Lv,
Shiwei Wu,
Kevin Qinghong Lin,
Chenan Song,
Difei Gao,
Jia-Wei Liu,
Ziteng Gao,
Dongxing Mao,
Mike Zheng Shou
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Joya and Lv, Zhaoyang and Wu, Shiwei and Lin, Kevin Qinghong and Song, Chenan and Gao, Difei and Liu, Jia-Wei and Gao, Ziteng and Mao, Dongxing and Shou, Mike Zheng}, title = {VideoLLM-online: Online Video Large Language Model for Streaming Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18407-18418} }

Towards Better Vision-Inspired Vision-Language Models
Yun-Hao Cao,
Kaixiang Ji,
Ziyuan Huang,
Chuanyang Zheng,
Jiajia Liu,
Jian Wang,
Jingdong Chen,
Ming Yang
[pdf]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Yun-Hao and Ji, Kaixiang and Huang, Ziyuan and Zheng, Chuanyang and Liu, Jiajia and Wang, Jian and Chen, Jingdong and Yang, Ming}, title = {Towards Better Vision-Inspired Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13537-13547} }

VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection
Zihua Liu,
Hiroki Sakuma,
Masatoshi Okutomi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Zihua and Sakuma, Hiroki and Okutomi, Masatoshi}, title = {VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17354-17363} }

RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation
Zeyuan Yang,
Jiageng Liu,
Peihao Chen,
Anoop Cherian,
Tim K. Marks,
Jonathan Le Roux,
Chuang Gan
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}, title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16251-16261} }

Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection
Wenjun Hui,
Zhenfeng Zhu,
Shuai Zheng,
Yao Zhao
[pdf]
[bibtex]
@InProceedings{Hui_2024_CVPR, author = {Hui, Wenjun and Zhu, Zhenfeng and Zheng, Shuai and Zhao, Yao}, title = {Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19058-19067} }

Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection
Huan Liu,
Zichang Tan,
Chuangchuang Tan,
Yunchao Wei,
Jingdong Wang,
Yao Zhao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Huan and Tan, Zichang and Tan, Chuangchuang and Wei, Yunchao and Wang, Jingdong and Zhao, Yao}, title = {Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10770-10780} }

PostureHMR: Posture Transformation for 3D Human Mesh Recovery
Yu-Pei Song,
Xiao Wu,
Zhaoquan Yuan,
Jian-Jun Qiao,
Qiang Peng
[pdf] [supp]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Yu-Pei and Wu, Xiao and Yuan, Zhaoquan and Qiao, Jian-Jun and Peng, Qiang}, title = {PostureHMR: Posture Transformation for 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9732-9741} }

Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis
Xin Zhou,
Dingkang Liang,
Wei Xu,
Xingkui Zhu,
Yihan Xu,
Zhikang Zou,
Xiang Bai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xin and Liang, Dingkang and Xu, Wei and Zhu, Xingkui and Xu, Yihan and Zou, Zhikang and Bai, Xiang}, title = {Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14707-14717} }

Wonder3D: Single Image to 3D using Cross-Domain Diffusion
Xiaoxiao Long,
Yuan-Chen Guo,
Cheng Lin,
Yuan Liu,
Zhiyang Dou,
Lingjie Liu,
Yuexin Ma,
Song-Hai Zhang,
Marc Habermann,
Christian Theobalt,
Wenping Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2024_CVPR, author = {Long, Xiaoxiao and Guo, Yuan-Chen and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Liu, Lingjie and Ma, Yuexin and Zhang, Song-Hai and Habermann, Marc and Theobalt, Christian and Wang, Wenping}, title = {Wonder3D: Single Image to 3D using Cross-Domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9970-9980} }

RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D
Lingteng Qiu,
Guanying Chen,
Xiaodong Gu,
Qi Zuo,
Mutian Xu,
Yushuang Wu,
Weihao Yuan,
Zilong Dong,
Liefeng Bo,
Xiaoguang Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2024_CVPR, author = {Qiu, Lingteng and Chen, Guanying and Gu, Xiaodong and Zuo, Qi and Xu, Mutian and Wu, Yushuang and Yuan, Weihao and Dong, Zilong and Bo, Liefeng and Han, Xiaoguang}, title = {RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9914-9925} }

Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions
Zeyu Han,
Fangrui Zhu,
Qianru Lao,
Huaizu Jiang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Zeyu and Zhu, Fangrui and Lao, Qianru and Jiang, Huaizu}, title = {Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14364-14374} }

Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers
Zi-Xin Zou,
Zhipeng Yu,
Yuan-Chen Guo,
Yangguang Li,
Ding Liang,
Yan-Pei Cao,
Song-Hai Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Zi-Xin and Yu, Zhipeng and Guo, Yuan-Chen and Li, Yangguang and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10324-10335} }

WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights
Youngdong Jang,
Dong In Lee,
MinHyuk Jang,
Jong Wook Kim,
Feng Yang,
Sangpil Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Youngdong and Lee, Dong In and Jang, MinHyuk and Kim, Jong Wook and Yang, Feng and Kim, Sangpil}, title = {WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12087-12097} }

Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction
Mi-Gyeong Gwon,
Gi-Mun Um,
Won-Sik Cheong,
Wonjun Kim
[pdf] [supp]
[bibtex]
@InProceedings{Gwon_2024_CVPR, author = {Gwon, Mi-Gyeong and Um, Gi-Mun and Cheong, Won-Sik and Kim, Wonjun}, title = {Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10553-10562} }

Robust Noisy Correspondence Learning with Equivariant Similarity Consistency
Yuchen Yang,
Likai Wang,
Erkun Yang,
Cheng Deng
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yuchen and Wang, Likai and Yang, Erkun and Deng, Cheng}, title = {Robust Noisy Correspondence Learning with Equivariant Similarity Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17700-17709} }

Compositional Video Understanding with Spatiotemporal Structure-based Transformers
Hoyeoung Yun,
Jinwoo Ahn,
Minseo Kim,
Eun-Sol Kim
[pdf] [supp]
[bibtex]
@InProceedings{Yun_2024_CVPR, author = {Yun, Hoyeoung and Ahn, Jinwoo and Kim, Minseo and Kim, Eun-Sol}, title = {Compositional Video Understanding with Spatiotemporal Structure-based Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18751-18760} }

3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation
Xingguang Zhong,
Yue Pan,
Cyrill Stachniss,
Jens Behley
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2024_CVPR, author = {Zhong, Xingguang and Pan, Yue and Stachniss, Cyrill and Behley, Jens}, title = {3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15417-15427} }

What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions
Brian Chen,
Nina Shvetsova,
Andrew Rouditchenko,
Daniel Kondermann,
Samuel Thomas,
Shih-Fu Chang,
Rogerio Feris,
James Glass,
Hilde Kuehne
[pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Brian and Shvetsova, Nina and Rouditchenko, Andrew and Kondermann, Daniel and Thomas, Samuel and Chang, Shih-Fu and Feris, Rogerio and Glass, James and Kuehne, Hilde}, title = {What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18419-18429} }

FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects
Bowen Wen,
Wei Yang,
Jan Kautz,
Stan Birchfield
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Bowen and Yang, Wei and Kautz, Jan and Birchfield, Stan}, title = {FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17868-17879} }

Hyperbolic Anomaly Detection
Huimin Li,
Zhentao Chen,
Yunhao Xu,
Junlin Hu
[pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Huimin and Chen, Zhentao and Xu, Yunhao and Hu, Junlin}, title = {Hyperbolic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17511-17520} }

VLP: Vision Language Planning for Autonomous Driving
Chenbin Pan,
Burhaneddin Yaman,
Tommaso Nesti,
Abhirup Mallik,
Alessandro G Allievi,
Senem Velipasalar,
Liu Ren
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Nesti, Tommaso and Mallik, Abhirup and Allievi, Alessandro G and Velipasalar, Senem and Ren, Liu}, title = {VLP: Vision Language Planning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14760-14769} }

ProMark: Proactive Diffusion Watermarking for Causal Attribution
Vishal Asnani,
John Collomosse,
Tu Bui,
Xiaoming Liu,
Shruti Agarwal
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asnani_2024_CVPR, author = {Asnani, Vishal and Collomosse, John and Bui, Tu and Liu, Xiaoming and Agarwal, Shruti}, title = {ProMark: Proactive Diffusion Watermarking for Causal Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10802-10811} }

Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering
Zaid Khan,
Yun Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and Fu, Yun}, title = {Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10854-10863} }

Implicit Motion Function
Yue Gao,
Jiahao Li,
Lei Chu,
Yan Lu
[pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yue and Li, Jiahao and Chu, Lei and Lu, Yan}, title = {Implicit Motion Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19278-19289} }

MultiDiff: Consistent Novel View Synthesis from a Single Image
Norman Müller,
Katja Schwarz,
Barbara Rössle,
Lorenzo Porzi,
Samuel Rota Bulò,
Matthias Nießner,
Peter Kontschieder
[pdf] [supp]
[bibtex]
@InProceedings{Muller_2024_CVPR, author = {M\"uller, Norman and Schwarz, Katja and R\"ossle, Barbara and Porzi, Lorenzo and Bul\`o, Samuel Rota and Nie{\ss}ner, Matthias and Kontschieder, Peter}, title = {MultiDiff: Consistent Novel View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10258-10268} }

Atom-Level Optical Chemical Structure Recognition with Limited Supervision
Martijn Oldenhof,
Edward De Brouwer,
Adam Arany,
Yves Moreau
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oldenhof_2024_CVPR, author = {Oldenhof, Martijn and De Brouwer, Edward and Arany, Adam and Moreau, Yves}, title = {Atom-Level Optical Chemical Structure Recognition with Limited Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17669-17678} }

LiDAR-based Person Re-identification
Wenxuan Guo,
Zhiyu Pan,
Yingping Liang,
Ziheng Xi,
Zhicheng Zhong,
Jianjiang Feng,
Jie Zhou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Wenxuan and Pan, Zhiyu and Liang, Yingping and Xi, Ziheng and Zhong, Zhicheng and Feng, Jianjiang and Zhou, Jie}, title = {LiDAR-based Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17437-17447} }

Model Adaptation for Time Constrained Embodied Control
Jaehyun Song,
Minjong Yoo,
Honguk Woo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Jaehyun and Yoo, Minjong and Woo, Honguk}, title = {Model Adaptation for Time Constrained Embodied Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16499-16508} }

ActiveDC: Distribution Calibration for Active Finetuning
Wenshuai Xu,
Zhenghui Hu,
Yu Lu,
Jinzhou Meng,
Qingjie Liu,
Yunhong Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Wenshuai and Hu, Zhenghui and Lu, Yu and Meng, Jinzhou and Liu, Qingjie and Wang, Yunhong}, title = {ActiveDC: Distribution Calibration for Active Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16996-17005} }

Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling
Jianan Fan,
Dongnan Liu,
Hang Chang,
Heng Huang,
Mei Chen,
Weidong Cai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Jianan and Liu, Dongnan and Chang, Hang and Huang, Heng and Chen, Mei and Cai, Weidong}, title = {Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11524-11534} }

Communication-Efficient Federated Learning with Accelerated Client Gradient
Geeho Kim,
Jinkyu Kim,
Bohyung Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Geeho and Kim, Jinkyu and Han, Bohyung}, title = {Communication-Efficient Federated Learning with Accelerated Client Gradient}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12385-12394} }

LLMs are Good Action Recognizers
Haoxuan Qu,
Yujun Cai,
Jun Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {LLMs are Good Action Recognizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18395-18406} }

Interactive Continual Learning: Fast and Slow Thinking
Biqing Qi,
Xinquan Chen,
Junqi Gao,
Dong Li,
Jianxing Liu,
Ligang Wu,
Bowen Zhou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Biqing and Chen, Xinquan and Gao, Junqi and Li, Dong and Liu, Jianxing and Wu, Ligang and Zhou, Bowen}, title = {Interactive Continual Learning: Fast and Slow Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12882-12892} }

Towards Learning a Generalist Model for Embodied Navigation
Duo Zheng,
Shijia Huang,
Lin Zhao,
Yiwu Zhong,
Liwei Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Duo and Huang, Shijia and Zhao, Lin and Zhong, Yiwu and Wang, Liwei}, title = {Towards Learning a Generalist Model for Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13624-13634} }

Splatter Image: Ultra-Fast Single-View 3D Reconstruction
Stanislaw Szymanowicz,
Chrisitian Rupprecht,
Andrea Vedaldi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2024_CVPR, author = {Szymanowicz, Stanislaw and Rupprecht, Chrisitian and Vedaldi, Andrea}, title = {Splatter Image: Ultra-Fast Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10208-10217} }

Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use
Imad Eddine Toubal,
Aditya Avinash,
Neil Gordon Alldrin,
Jan Dlabal,
Wenlei Zhou,
Enming Luo,
Otilia Stretcu,
Hao Xiong,
Chun-Ta Lu,
Howard Zhou,
Ranjay Krishna,
Ariel Fuxman,
Tom Duerig
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Toubal_2024_CVPR, author = {Toubal, Imad Eddine and Avinash, Aditya and Alldrin, Neil Gordon and Dlabal, Jan and Zhou, Wenlei and Luo, Enming and Stretcu, Otilia and Xiong, Hao and Lu, Chun-Ta and Zhou, Howard and Krishna, Ranjay and Fuxman, Ariel and Duerig, Tom}, title = {Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17553-17563} }

GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement
Linfang Zheng,
Tze Ho Elden Tse,
Chen Wang,
Yinghan Sun,
Hua Chen,
Ales Leonardis,
Wei Zhang,
Hyung Jin Chang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Linfang and Tse, Tze Ho Elden and Wang, Chen and Sun, Yinghan and Chen, Hua and Leonardis, Ales and Zhang, Wei and Chang, Hyung Jin}, title = {GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10693-10703} }

Learning Group Activity Features Through Person Attribute Prediction
Chihiro Nakatani,
Hiroaki Kawashima,
Norimichi Ukita
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nakatani_2024_CVPR, author = {Nakatani, Chihiro and Kawashima, Hiroaki and Ukita, Norimichi}, title = {Learning Group Activity Features Through Person Attribute Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18233-18242} }

Plug-and-Play Diffusion Distillation
Yi-Ting Hsiao,
Siavash Khodadadeh,
Kevin Duarte,
Wei-An Lin,
Hui Qu,
Mingi Kwon,
Ratheesh Kalarot
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Yi-Ting and Khodadadeh, Siavash and Duarte, Kevin and Lin, Wei-An and Qu, Hui and Kwon, Mingi and Kalarot, Ratheesh}, title = {Plug-and-Play Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13743-13752} }

MindBridge: A Cross-Subject Brain Decoding Framework
Shizun Wang,
Songhua Liu,
Zhenxiong Tan,
Xinchao Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shizun and Liu, Songhua and Tan, Zhenxiong and Wang, Xinchao}, title = {MindBridge: A Cross-Subject Brain Decoding Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11333-11342} }

MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning
Chaoyi Zhang,
Kevin Lin,
Zhengyuan Yang,
Jianfeng Wang,
Linjie Li,
Chung-Ching Lin,
Zicheng Liu,
Lijuan Wang
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chaoyi and Lin, Kevin and Yang, Zhengyuan and Wang, Jianfeng and Li, Linjie and Lin, Chung-Ching and Liu, Zicheng and Wang, Lijuan}, title = {MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13647-13657} }

Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation
Xiyi Chen,
Marko Mihajlovic,
Shaofei Wang,
Sergey Prokudin,
Siyu Tang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xiyi and Mihajlovic, Marko and Wang, Shaofei and Prokudin, Sergey and Tang, Siyu}, title = {Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10359-10370} }

Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI
Sean I. Young,
Yael Balbastre,
Bruce Fischl,
Polina Golland,
Juan Eugenio Iglesias
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Young_2024_CVPR, author = {Young, Sean I. and Balbastre, Yael and Fischl, Bruce and Golland, Polina and Iglesias, Juan Eugenio}, title = {Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11535-11545} }

Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model
Zhicai Wang,
Longhui Wei,
Tan Wang,
Heyu Chen,
Yanbin Hao,
Xiang Wang,
Xiangnan He,
Qi Tian
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zhicai and Wei, Longhui and Wang, Tan and Chen, Heyu and Hao, Yanbin and Wang, Xiang and He, Xiangnan and Tian, Qi}, title = {Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17223-17233} }

Alpha-CLIP: A CLIP Model Focusing on Wherever You Want
Zeyi Sun,
Ye Fang,
Tong Wu,
Pan Zhang,
Yuhang Zang,
Shu Kong,
Yuanjun Xiong,
Dahua Lin,
Jiaqi Wang
[pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Zeyi and Fang, Ye and Wu, Tong and Zhang, Pan and Zang, Yuhang and Kong, Shu and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Alpha-CLIP: A CLIP Model Focusing on Wherever You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13019-13029} }

ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association
Shuxiao Ding,
Lukas Schneider,
Marius Cordts,
Juergen Gall
[pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Shuxiao and Schneider, Lukas and Cordts, Marius and Gall, Juergen}, title = {ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15184-15194} }

Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation
Lior Talker,
Aviad Cohen,
Erez Yosef,
Alexandra Dana,
Michael Dinerstein
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talker_2024_CVPR, author = {Talker, Lior and Cohen, Aviad and Yosef, Erez and Dana, Alexandra and Dinerstein, Michael}, title = {Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10606-10616} }

Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models
Hongjie Wang,
Difan Liu,
Yan Kang,
Yijun Li,
Zhe Lin,
Niraj K. Jha,
Yuchen Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Liu, Difan and Kang, Yan and Li, Yijun and Lin, Zhe and Jha, Niraj K. and Liu, Yuchen}, title = {Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16080-16089} }

CPR: Retrieval Augmented Generation for Copyright Protection
Aditya Golatkar,
Alessandro Achille,
Luca Zancato,
Yu-Xiang Wang,
Ashwin Swaminathan,
Stefano Soatto
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Golatkar_2024_CVPR, author = {Golatkar, Aditya and Achille, Alessandro and Zancato, Luca and Wang, Yu-Xiang and Swaminathan, Ashwin and Soatto, Stefano}, title = {CPR: Retrieval Augmented Generation for Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12374-12384} }

Vision-and-Language Navigation via Causal Learning
Liuyi Wang,
Zongtao He,
Ronghao Dang,
Mengjiao Shen,
Chengju Liu,
Qijun Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Liuyi and He, Zongtao and Dang, Ronghao and Shen, Mengjiao and Liu, Chengju and Chen, Qijun}, title = {Vision-and-Language Navigation via Causal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13139-13150} }

Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation
Wenxuan Wang,
Tongtian Yue,
Yisi Zhang,
Longteng Guo,
Xingjian He,
Xinlong Wang,
Jing Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Wenxuan and Yue, Tongtian and Zhang, Yisi and Guo, Longteng and He, Xingjian and Wang, Xinlong and Liu, Jing}, title = {Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12998-13008} }

Differentiable Display Photometric Stereo
Seokjun Choi,
Seungwoo Yoon,
Giljoo Nam,
Seungyong Lee,
Seung-Hwan Baek
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Seokjun and Yoon, Seungwoo and Nam, Giljoo and Lee, Seungyong and Baek, Seung-Hwan}, title = {Differentiable Display Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11831-11840} }

In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification
Jinseong Park,
Yujin Choi,
Jaewook Lee
[pdf] [supp]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jinseong and Choi, Yujin and Lee, Jaewook}, title = {In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12236-12246} }

LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels
Tuo Feng,
Wenguan Wang,
Fan Ma,
Yi Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Tuo and Wang, Wenguan and Ma, Fan and Yang, Yi}, title = {LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14916-14927} }

Diversified and Personalized Multi-rater Medical Image Segmentation
Yicheng Wu,
Xiangde Luo,
Zhe Xu,
Xiaoqing Guo,
Lie Ju,
Zongyuan Ge,
Wenjun Liao,
Jianfei Cai
[pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Yicheng and Luo, Xiangde and Xu, Zhe and Guo, Xiaoqing and Ju, Lie and Ge, Zongyuan and Liao, Wenjun and Cai, Jianfei}, title = {Diversified and Personalized Multi-rater Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11470-11479} }

Discover and Mitigate Multiple Biased Subgroups in Image Classifiers
Zeliang Zhang,
Mingqian Feng,
Zhiheng Li,
Chenliang Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zeliang and Feng, Mingqian and Li, Zhiheng and Xu, Chenliang}, title = {Discover and Mitigate Multiple Biased Subgroups in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10906-10915} }

ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations
Rwiddhi Chakraborty,
Adrian Sletten,
Michael C. Kampffmeyer
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chakraborty_2024_CVPR, author = {Chakraborty, Rwiddhi and Sletten, Adrian and Kampffmeyer, Michael C.}, title = {ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12017-12026} }

Learning to Segment Referred Objects from Narrated Egocentric Videos
Yuhan Shen,
Huiyu Wang,
Xitong Yang,
Matt Feiszli,
Ehsan Elhamifar,
Lorenzo Torresani,
Effrosyni Mavroudi
[pdf] [supp]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Wang, Huiyu and Yang, Xitong and Feiszli, Matt and Elhamifar, Ehsan and Torresani, Lorenzo and Mavroudi, Effrosyni}, title = {Learning to Segment Referred Objects from Narrated Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14510-14520} }

Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images
Chaoqin Huang,
Aofan Jiang,
Jinghao Feng,
Ya Zhang,
Xinchao Wang,
Yanfeng Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Chaoqin and Jiang, Aofan and Feng, Jinghao and Zhang, Ya and Wang, Xinchao and Wang, Yanfeng}, title = {Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11375-11385} }

Depth-aware Test-Time Training for Zero-shot Video Object Segmentation
Weihuang Liu,
Xi Shen,
Haolun Li,
Xiuli Bi,
Bo Liu,
Chi-Man Pun,
Xiaodong Cun
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Weihuang and Shen, Xi and Li, Haolun and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Cun, Xiaodong}, title = {Depth-aware Test-Time Training for Zero-shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19218-19227} }

RMem: Restricted Memory Banks Improve Video Object Segmentation
Junbao Zhou,
Ziqi Pang,
Yu-Xiong Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Junbao and Pang, Ziqi and Wang, Yu-Xiong}, title = {RMem: Restricted Memory Banks Improve Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18602-18611} }

Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers
Hongjie Wang,
Bhishma Dedhia,
Niraj K. Jha
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Dedhia, Bhishma and Jha, Niraj K.}, title = {Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16070-16079} }

DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement
Hao Wu,
Huabin Liu,
Yu Qiao,
Xiao Sun
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Hao and Liu, Huabin and Qiao, Yu and Sun, Xiao}, title = {DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18699-18708} }

SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge
Andong Wang,
Bo Wu,
Sunli Chen,
Zhenfang Chen,
Haotian Guan,
Wei-Ning Lee,
Li Erran Li,
Chuang Gan
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Andong and Wu, Bo and Chen, Sunli and Chen, Zhenfang and Guan, Haotian and Lee, Wei-Ning and Li, Li Erran and Gan, Chuang}, title = {SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13384-13394} }

LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking
Jialin Li,
Qiang Nie,
Weifu Fu,
Yuhuan Lin,
Guangpin Tao,
Yong Liu,
Chengjie Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jialin and Nie, Qiang and Fu, Weifu and Lin, Yuhuan and Tao, Guangpin and Liu, Yong and Wang, Chengjie}, title = {LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15866-15876} }

Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer
Zhen Zhao,
Jingqun Tang,
Chunhui Lin,
Binghong Wu,
Can Huang,
Hao Liu,
Xin Tan,
Zhizhong Zhang,
Yuan Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhen and Tang, Jingqun and Lin, Chunhui and Wu, Binghong and Huang, Can and Liu, Hao and Tan, Xin and Zhang, Zhizhong and Xie, Yuan}, title = {Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15567-15576} }

Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning
Zichen Miao,
Jiang Wang,
Ze Wang,
Zhengyuan Yang,
Lijuan Wang,
Qiang Qiu,
Zicheng Liu
[pdf] [supp]
[bibtex]
@InProceedings{Miao_2024_CVPR, author = {Miao, Zichen and Wang, Jiang and Wang, Ze and Yang, Zhengyuan and Wang, Lijuan and Qiu, Qiang and Liu, Zicheng}, title = {Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10844-10853} }

LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation
Ke Guo,
Zhenwei Miao,
Wei Jing,
Weiwei Liu,
Weizi Li,
Dayang Hao,
Jia Pan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Ke and Miao, Zhenwei and Jing, Wei and Liu, Weiwei and Li, Weizi and Hao, Dayang and Pan, Jia}, title = {LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15386-15395} }

SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects
Abhinav Kumar,
Yuliang Guo,
Xinyu Huang,
Liu Ren,
Xiaoming Liu
[pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Abhinav and Guo, Yuliang and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10269-10280} }

NOPE: Novel Object Pose Estimation from a Single Image
Van Nguyen Nguyen,
Thibault Groueix,
Georgy Ponimatkin,
Yinlin Hu,
Renaud Marlet,
Mathieu Salzmann,
Vincent Lepetit
[pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Ponimatkin, Georgy and Hu, Yinlin and Marlet, Renaud and Salzmann, Mathieu and Lepetit, Vincent}, title = {NOPE: Novel Object Pose Estimation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17923-17932} }

Dual-View Visual Contextualization for Web Navigation
Jihyung Kil,
Chan Hee Song,
Boyuan Zheng,
Xiang Deng,
Yu Su,
Wei-Lun Chao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kil_2024_CVPR, author = {Kil, Jihyung and Song, Chan Hee and Zheng, Boyuan and Deng, Xiang and Su, Yu and Chao, Wei-Lun}, title = {Dual-View Visual Contextualization for Web Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14445-14454} }

Language-driven Grasp Detection
An Dinh Vuong,
Minh Nhat Vu,
Baoru Huang,
Nghia Nguyen,
Hieu Le,
Thieu Vo,
Anh Nguyen
[pdf] [supp]
[bibtex]
@InProceedings{Vuong_2024_CVPR, author = {Vuong, An Dinh and Vu, Minh Nhat and Huang, Baoru and Nguyen, Nghia and Le, Hieu and Vo, Thieu and Nguyen, Anh}, title = {Language-driven Grasp Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17902-17912} }

Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods
Chenfan Qu,
Yiwu Zhong,
Chongyu Liu,
Guitao Xu,
Dezhi Peng,
Fengjun Guo,
Lianwen Jin
[pdf] [supp]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Liu, Chongyu and Xu, Guitao and Peng, Dezhi and Guo, Fengjun and Jin, Lianwen}, title = {Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10781-10790} }

Object Recognition as Next Token Prediction
Kaiyu Yue,
Bor-Chun Chen,
Jonas Geiping,
Hengduo Li,
Tom Goldstein,
Ser-Nam Lim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2024_CVPR, author = {Yue, Kaiyu and Chen, Bor-Chun and Geiping, Jonas and Li, Hengduo and Goldstein, Tom and Lim, Ser-Nam}, title = {Object Recognition as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16645-16656} }

Transcriptomics-guided Slide Representation Learning in Computational Pathology
Guillaume Jaume,
Lukas Oldenburg,
Anurag Vaidya,
Richard J. Chen,
Drew F.K. Williamson,
Thomas Peeters,
Andrew H. Song,
Faisal Mahmood
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Oldenburg, Lukas and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Peeters, Thomas and Song, Andrew H. and Mahmood, Faisal}, title = {Transcriptomics-guided Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9632-9644} }

CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow
Chenbin Pan,
Burhaneddin Yaman,
Senem Velipasalar,
Liu Ren
[pdf] [supp]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Velipasalar, Senem and Ren, Liu}, title = {CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15216-15225} }

CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update
Zhi Gao,
Yuntao Du,
Xintong Zhang,
Xiaojian Ma,
Wenjuan Han,
Song-Chun Zhu,
Qing Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Zhi and Du, Yuntao and Zhang, Xintong and Ma, Xiaojian and Han, Wenjuan and Zhu, Song-Chun and Li, Qing}, title = {CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13258-13268} }

Depth Prompting for Sensor-Agnostic Depth Estimation
Jin-Hwi Park,
Chanhwi Jeong,
Junoh Lee,
Hae-Gon Jeon
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jin-Hwi and Jeong, Chanhwi and Lee, Junoh and Jeon, Hae-Gon}, title = {Depth Prompting for Sensor-Agnostic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9859-9869} }

G3DR: Generative 3D Reconstruction in ImageNet
Pradyumna Reddy,
Ismail Elezi,
Jiankang Deng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2024_CVPR, author = {Reddy, Pradyumna and Elezi, Ismail and Deng, Jiankang}, title = {G3DR: Generative 3D Reconstruction in ImageNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9655-9665} }

Hyperspherical Classification with Dynamic Label-to-Prototype Assignment
Mohammad Saeed Ebrahimi Saadabadi,
Ali Dabouei,
Sahar Rahimi Malakshan,
Nasser M. Nasrabadi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saadabadi_2024_CVPR, author = {Saadabadi, Mohammad Saeed Ebrahimi and Dabouei, Ali and Malakshan, Sahar Rahimi and Nasrabadi, Nasser M.}, title = {Hyperspherical Classification with Dynamic Label-to-Prototype Assignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17333-17342} }

VTimeLLM: Empower LLM to Grasp Video Moments
Bin Huang,
Xin Wang,
Hong Chen,
Zihan Song,
Wenwu Zhu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Bin and Wang, Xin and Chen, Hong and Song, Zihan and Zhu, Wenwu}, title = {VTimeLLM: Empower LLM to Grasp Video Moments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14271-14280} }

FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning
Junyuan Zhang,
Shuang Zeng,
Miao Zhang,
Runxi Wang,
Feifei Wang,
Yuyin Zhou,
Paul Pu Liang,
Liangqiong Qu
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyuan and Zeng, Shuang and Zhang, Miao and Wang, Runxi and Wang, Feifei and Zhou, Yuyin and Liang, Paul Pu and Qu, Liangqiong}, title = {FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12098-12108} }

Privacy-Preserving Optics for Enhancing Protection in Face De-Identification
Jhon Lopez,
Carlos Hinojosa,
Henry Arguello,
Bernard Ghanem
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lopez_2024_CVPR, author = {Lopez, Jhon and Hinojosa, Carlos and Arguello, Henry and Ghanem, Bernard}, title = {Privacy-Preserving Optics for Enhancing Protection in Face De-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12120-12129} }

SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction
Yang Zhou,
Hao Shao,
Letian Wang,
Steven L. Waslander,
Hongsheng Li,
Yu Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Shao, Hao and Wang, Letian and Waslander, Steven L. and Li, Hongsheng and Liu, Yu}, title = {SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15281-15290} }

Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning
Menghao Zhang,
Jingyu Wang,
Qi Qi,
Haifeng Sun,
Zirui Zhuang,
Pengfei Ren,
Ruilong Ma,
Jianxin Liao
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Menghao and Wang, Jingyu and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Ren, Pengfei and Ma, Ruilong and Liao, Jianxin}, title = {Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17385-17394} }

Generative Multimodal Models are In-Context Learners
Quan Sun,
Yufeng Cui,
Xiaosong Zhang,
Fan Zhang,
Qiying Yu,
Yueze Wang,
Yongming Rao,
Jingjing Liu,
Tiejun Huang,
Xinlong Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Quan and Cui, Yufeng and Zhang, Xiaosong and Zhang, Fan and Yu, Qiying and Wang, Yueze and Rao, Yongming and Liu, Jingjing and Huang, Tiejun and Wang, Xinlong}, title = {Generative Multimodal Models are In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14398-14409} }

Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology
Wenhao Tang,
Fengtao Zhou,
Sheng Huang,
Xiang Zhu,
Yi Zhang,
Bo Liu
[pdf] [supp]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Wenhao and Zhou, Fengtao and Huang, Sheng and Zhu, Xiang and Zhang, Yi and Liu, Bo}, title = {Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11343-11352} }

Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection
Zhiwei Yang,
Jing Liu,
Peng Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Liu, Jing and Wu, Peng}, title = {Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18899-18908} }

SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction
Pin Tang,
Zhongdao Wang,
Guoqing Wang,
Jilai Zheng,
Xiangxuan Ren,
Bailan Feng,
Chao Ma
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Pin and Wang, Zhongdao and Wang, Guoqing and Zheng, Jilai and Ren, Xiangxuan and Feng, Bailan and Ma, Chao}, title = {SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15035-15044} }

Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture
Fei Wang,
Dan Guo,
Kun Li,
Zhun Zhong,
Meng Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Fei and Guo, Dan and Li, Kun and Zhong, Zhun and Wang, Meng}, title = {Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18984-18994} }

Hyperbolic Learning with Synthetic Captions for Open-World Detection
Fanjie Kong,
Yanbei Chen,
Jiarui Cai,
Davide Modolo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Fanjie and Chen, Yanbei and Cai, Jiarui and Modolo, Davide}, title = {Hyperbolic Learning with Synthetic Captions for Open-World Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16762-16771} }

Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding
Alessandro Achille,
Greg Ver Steeg,
Tian Yu Liu,
Matthew Trager,
Carson Klingenberg,
Stefano Soatto
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Achille_2024_CVPR, author = {Achille, Alessandro and Steeg, Greg Ver and Liu, Tian Yu and Trager, Matthew and Klingenberg, Carson and Soatto, Stefano}, title = {Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11062-11071} }

3D Feature Tracking via Event Camera
Siqi Li,
Zhikuan Zhou,
Zhou Xue,
Yipeng Li,
Shaoyi Du,
Yue Gao
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Siqi and Zhou, Zhikuan and Xue, Zhou and Li, Yipeng and Du, Shaoyi and Gao, Yue}, title = {3D Feature Tracking via Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18974-18983} }

MaxQ: Multi-Axis Query for N:M Sparsity Network
Jingyang Xiang,
Siqi Li,
Junhao Chen,
Zhuangzhi Chen,
Tianxin Huang,
Linpeng Peng,
Yong Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jingyang and Li, Siqi and Chen, Junhao and Chen, Zhuangzhi and Huang, Tianxin and Peng, Linpeng and Liu, Yong}, title = {MaxQ: Multi-Axis Query for N:M Sparsity Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15845-15854} }

Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition
Anqi Zhu,
Qiuhong Ke,
Mingming Gong,
James Bailey
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Anqi and Ke, Qiuhong and Gong, Mingming and Bailey, James}, title = {Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18761-18770} }

Composing Object Relations and Attributes for Image-Text Matching
Khoi Pham,
Chuong Huynh,
Ser-Nam Lim,
Abhinav Shrivastava
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2024_CVPR, author = {Pham, Khoi and Huynh, Chuong and Lim, Ser-Nam and Shrivastava, Abhinav}, title = {Composing Object Relations and Attributes for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14354-14363} }

Previously on ... From Recaps to Story Summarization
Aditya Kumar Singh,
Dhruv Srivastava,
Makarand Tapaswi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2024_CVPR, author = {Singh, Aditya Kumar and Srivastava, Dhruv and Tapaswi, Makarand}, title = {Previously on ... From Recaps to Story Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13635-13646} }

mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration
Qinghao Ye,
Haiyang Xu,
Jiabo Ye,
Ming Yan,
Anwen Hu,
Haowei Liu,
Qi Qian,
Ji Zhang,
Fei Huang
[pdf] [supp]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Qinghao and Xu, Haiyang and Ye, Jiabo and Yan, Ming and Hu, Anwen and Liu, Haowei and Qian, Qi and Zhang, Ji and Huang, Fei}, title = {mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13040-13051} }

Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning
Rongjie Li,
Yu Wu,
Xuming He
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Rongjie and Wu, Yu and He, Xuming}, title = {Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13428-13437} }

Supervised Anomaly Detection for Complex Industrial Images
Aimira Baitieva,
David Hurych,
Victor Besnier,
Olivier Bernard
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baitieva_2024_CVPR, author = {Baitieva, Aimira and Hurych, David and Besnier, Victor and Bernard, Olivier}, title = {Supervised Anomaly Detection for Complex Industrial Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17754-17762} }

Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships
Sebastian Koch,
Narunas Vaskevicius,
Mirco Colosi,
Pedro Hermosilla,
Timo Ropinski
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koch_2024_CVPR, author = {Koch, Sebastian and Vaskevicius, Narunas and Colosi, Mirco and Hermosilla, Pedro and Ropinski, Timo}, title = {Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14183-14193} }

SURE: SUrvey REcipes for building reliable and robust deep networks
Yuting Li,
Yingyi Chen,
Xuanlong Yu,
Dexiong Chen,
Xi Shen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuting and Chen, Yingyi and Yu, Xuanlong and Chen, Dexiong and Shen, Xi}, title = {SURE: SUrvey REcipes for building reliable and robust deep networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17500-17510} }

PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates
Ruoqi Wang,
Zhuoyang Chen,
Jiayi Zhu,
Qiong Luo,
Feng Wang
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Ruoqi and Chen, Zhuoyang and Zhu, Jiayi and Luo, Qiong and Wang, Feng}, title = {PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12841-12850} }

Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation
Razvan-George Pasca,
Alexey Gavryushin,
Muhammad Hamza,
Yen-Ling Kuo,
Kaichun Mo,
Luc Van Gool,
Otmar Hilliges,
Xi Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pasca_2024_CVPR, author = {Pasca, Razvan-George and Gavryushin, Alexey and Hamza, Muhammad and Kuo, Yen-Ling and Mo, Kaichun and Van Gool, Luc and Hilliges, Otmar and Wang, Xi}, title = {Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18286-18296} }

Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency
Yuqi Zhang,
Han Luo,
Yinjie Lei
[pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuqi and Luo, Han and Lei, Yinjie}, title = {Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13063-13072} }

Optimal Transport Aggregation for Visual Place Recognition
Sergio Izquierdo,
Javier Civera
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Izquierdo_2024_CVPR, author = {Izquierdo, Sergio and Civera, Javier}, title = {Optimal Transport Aggregation for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17658-17668} }

Aligning and Prompting Everything All at Once for Universal Visual Perception
Yunhang Shen,
Chaoyou Fu,
Peixian Chen,
Mengdan Zhang,
Ke Li,
Xing Sun,
Yunsheng Wu,
Shaohui Lin,
Rongrong Ji
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Yunhang and Fu, Chaoyou and Chen, Peixian and Zhang, Mengdan and Li, Ke and Sun, Xing and Wu, Yunsheng and Lin, Shaohui and Ji, Rongrong}, title = {Aligning and Prompting Everything All at Once for Universal Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13193-13203} }

Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities
Mingcheng Li,
Dingkang Yang,
Xiao Zhao,
Shuaibing Wang,
Yan Wang,
Kun Yang,
Mingyang Sun,
Dongliang Kou,
Ziyun Qian,
Lihua Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Mingcheng and Yang, Dingkang and Zhao, Xiao and Wang, Shuaibing and Wang, Yan and Yang, Kun and Sun, Mingyang and Kou, Dongliang and Qian, Ziyun and Zhang, Lihua}, title = {Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12458-12468} }

LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation
Linfeng Yuan,
Miaojing Shi,
Zijie Yue,
Qijun Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Linfeng and Shi, Miaojing and Yue, Zijie and Chen, Qijun}, title = {LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14001-14010} }

Dual Prototype Attention for Unsupervised Video Object Segmentation
Suhwan Cho,
Minhyeok Lee,
Seunghoon Lee,
Dogyoon Lee,
Heeseung Choi,
Ig-Jae Kim,
Sangyoun Lee
[pdf] [arXiv]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Suhwan and Lee, Minhyeok and Lee, Seunghoon and Lee, Dogyoon and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Dual Prototype Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19238-19247} }

Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse
Yining Wang,
Junjie Sun,
Chenyue Wang,
Mi Zhang,
Min Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yining and Sun, Junjie and Wang, Chenyue and Zhang, Mi and Yang, Min}, title = {Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12322-12331} }

A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion
Feng Yu,
Teng Zhang,
Gilad Lerman
[pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Feng and Zhang, Teng and Lerman, Gilad}, title = {A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14575-14584} }

CAD: Photorealistic 3D Generation via Adversarial Distillation
Ziyu Wan,
Despoina Paschalidou,
Ian Huang,
Hongyu Liu,
Bokui Shen,
Xiaoyu Xiang,
Jing Liao,
Leonidas Guibas
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2024_CVPR, author = {Wan, Ziyu and Paschalidou, Despoina and Huang, Ian and Liu, Hongyu and Shen, Bokui and Xiang, Xiaoyu and Liao, Jing and Guibas, Leonidas}, title = {CAD: Photorealistic 3D Generation via Adversarial Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10194-10207} }

Enhancing Vision-Language Pre-training with Rich Supervisions
Yuan Gao,
Kunyu Shi,
Pengkai Zhu,
Edouard Belval,
Oren Nuriel,
Srikar Appalaraju,
Shabnam Ghadar,
Zhuowen Tu,
Vijay Mahadevan,
Stefano Soatto
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Shi, Kunyu and Zhu, Pengkai and Belval, Edouard and Nuriel, Oren and Appalaraju, Srikar and Ghadar, Shabnam and Tu, Zhuowen and Mahadevan, Vijay and Soatto, Stefano}, title = {Enhancing Vision-Language Pre-training with Rich Supervisions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13480-13491} }

Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning
Youqi Pan,
Wugen Zhou,
Yingdian Cao,
Hongbin Zha
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Youqi and Zhou, Wugen and Cao, Yingdian and Zha, Hongbin}, title = {Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18019-18028} }

Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching
Shitong Shao,
Zeyuan Yin,
Muxin Zhou,
Xindong Zhang,
Zhiqiang Shen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Shitong and Yin, Zeyuan and Zhou, Muxin and Zhang, Xindong and Shen, Zhiqiang}, title = {Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16709-16718} }

On Train-Test Class Overlap and Detection for Image Retrieval
Chull Hwan Song,
Jooyoung Yoon,
Taebaek Hwang,
Shunghyun Choi,
Yeong Hyeon Gu,
Yannis Avrithis
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Yoon, Jooyoung and Hwang, Taebaek and Choi, Shunghyun and Gu, Yeong Hyeon and Avrithis, Yannis}, title = {On Train-Test Class Overlap and Detection for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17375-17384} }

AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing
Fan Yang,
Tianyi Chen,
Xiaosheng He,
Zhongang Cai,
Lei Yang,
Si Wu,
Guosheng Lin
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Fan and Chen, Tianyi and He, Xiaosheng and Cai, Zhongang and Yang, Lei and Wu, Si and Lin, Guosheng}, title = {AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10596-10605} }

Learning Object State Changes in Videos: An Open-World Perspective
Zihui Xue,
Kumar Ashutosh,
Kristen Grauman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Zihui and Ashutosh, Kumar and Grauman, Kristen}, title = {Learning Object State Changes in Videos: An Open-World Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18493-18503} }

SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation
Zhixuan Liu,
Peter Schaldenbrand,
Beverley-Claire Okogwu,
Wenxuan Peng,
Youngsik Yun,
Andrew Hundt,
Jihie Kim,
Jean Oh
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Zhixuan and Schaldenbrand, Peter and Okogwu, Beverley-Claire and Peng, Wenxuan and Yun, Youngsik and Hundt, Andrew and Kim, Jihie and Oh, Jean}, title = {SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10822-10832} }

Iterated Learning Improves Compositionality in Large Vision-Language Models
Chenhao Zheng,
Jieyu Zhang,
Aniruddha Kembhavi,
Ranjay Krishna
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Kembhavi, Aniruddha and Krishna, Ranjay}, title = {Iterated Learning Improves Compositionality in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13785-13795} }

Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline
Xiao Wang,
Shiao Wang,
Chuanming Tang,
Lin Zhu,
Bo Jiang,
Yonghong Tian,
Jin Tang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiao and Wang, Shiao and Tang, Chuanming and Zhu, Lin and Jiang, Bo and Tian, Yonghong and Tang, Jin}, title = {Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19248-19257} }

Dual DETRs for Multi-Label Temporal Action Detection
Yuhan Zhu,
Guozhen Zhang,
Jing Tan,
Gangshan Wu,
Limin Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yuhan and Zhang, Guozhen and Tan, Jing and Wu, Gangshan and Wang, Limin}, title = {Dual DETRs for Multi-Label Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18559-18569} }

Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning
Jiahan Li,
Jiuyang Dong,
Shenjin Huang,
Xi Li,
Junjun Jiang,
Xiaopeng Fan,
Yongbing Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jiahan and Dong, Jiuyang and Huang, Shenjin and Li, Xi and Jiang, Junjun and Fan, Xiaopeng and Zhang, Yongbing}, title = {Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11259-11268} }

DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions
Yunxiao Shi,
Manish Kumar Singh,
Hong Cai,
Fatih Porikli
[pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Yunxiao and Singh, Manish Kumar and Cai, Hong and Porikli, Fatih}, title = {DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10736-10746} }

Utility-Fairness Trade-Offs and How to Find Them
Sepehr Dehdashtian,
Bashir Sadeghi,
Vishnu Naresh Boddeti
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dehdashtian_2024_CVPR, author = {Dehdashtian, Sepehr and Sadeghi, Bashir and Boddeti, Vishnu Naresh}, title = {Utility-Fairness Trade-Offs and How to Find Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12037-12046} }

SAOR: Single-View Articulated Object Reconstruction
Mehmet Aygun,
Oisin Mac Aodha
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aygun_2024_CVPR, author = {Aygun, Mehmet and Mac Aodha, Oisin}, title = {SAOR: Single-View Articulated Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10382-10391} }

A Theory of Joint Light and Heat Transport for Lambertian Scenes
Mani Ramanagopal,
Sriram Narayanan,
Aswin C. Sankaranarayanan,
Srinivasa G. Narasimhan
[pdf] [supp]
[bibtex]
@InProceedings{Ramanagopal_2024_CVPR, author = {Ramanagopal, Mani and Narayanan, Sriram and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {A Theory of Joint Light and Heat Transport for Lambertian Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11924-11933} }

iKUN: Speak to Trackers without Retraining
Yunhao Du,
Cheng Lei,
Zhicheng Zhao,
Fei Su
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2024_CVPR, author = {Du, Yunhao and Lei, Cheng and Zhao, Zhicheng and Su, Fei}, title = {iKUN: Speak to Trackers without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19135-19144} }

Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction
Zhenzhong Kuang,
Xiaochen Yang,
Yingjie Shen,
Chao Hu,
Jun Yu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2024_CVPR, author = {Kuang, Zhenzhong and Yang, Xiaochen and Shen, Yingjie and Hu, Chao and Yu, Jun}, title = {Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12406-12415} }

3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation
Songchun Zhang,
Yibo Zhang,
Quan Zheng,
Rui Ma,
Wei Hua,
Hujun Bao,
Weiwei Xu,
Changqing Zou
[pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Songchun and Zhang, Yibo and Zheng, Quan and Ma, Rui and Hua, Wei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10170-10180} }

VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources
Fan Fei,
Jiajun Tang,
Ping Tan,
Boxin Shi
[pdf] [supp]
[bibtex]
@InProceedings{Fei_2024_CVPR, author = {Fei, Fan and Tang, Jiajun and Tan, Ping and Shi, Boxin}, title = {VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11800-11809} }

RoHM: Robust Human Motion Reconstruction via Diffusion
Siwei Zhang,
Bharat Lal Bhatnagar,
Yuanlu Xu,
Alexander Winkler,
Petr Kadlecek,
Siyu Tang,
Federica Bogo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Siwei and Bhatnagar, Bharat Lal and Xu, Yuanlu and Winkler, Alexander and Kadlecek, Petr and Tang, Siyu and Bogo, Federica}, title = {RoHM: Robust Human Motion Reconstruction via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14606-14617} }

Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval
Minkuk Kim,
Hyeon Bae Kim,
Jinyoung Moon,
Jinwoo Choi,
Seong Tae Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Minkuk and Kim, Hyeon Bae and Moon, Jinyoung and Choi, Jinwoo and Kim, Seong Tae}, title = {Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13894-13904} }

SPAD: Spatially Aware Multi-View Diffusers
Yash Kant,
Aliaksandr Siarohin,
Ziyi Wu,
Michael Vasilkovsky,
Guocheng Qian,
Jian Ren,
Riza Alp Guler,
Bernard Ghanem,
Sergey Tulyakov,
Igor Gilitschenski
[pdf] [supp]
[bibtex]
@InProceedings{Kant_2024_CVPR, author = {Kant, Yash and Siarohin, Aliaksandr and Wu, Ziyi and Vasilkovsky, Michael and Qian, Guocheng and Ren, Jian and Guler, Riza Alp and Ghanem, Bernard and Tulyakov, Sergey and Gilitschenski, Igor}, title = {SPAD: Spatially Aware Multi-View Diffusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10026-10038} }

Gradient Reweighting: Towards Imbalanced Class-Incremental Learning
Jiangpeng He
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Jiangpeng}, title = {Gradient Reweighting: Towards Imbalanced Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16668-16677} }

Gaussian Splatting SLAM
Hidenobu Matsuki,
Riku Murai,
Paul H.J. Kelly,
Andrew J. Davison
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuki_2024_CVPR, author = {Matsuki, Hidenobu and Murai, Riku and Kelly, Paul H.J. and Davison, Andrew J.}, title = {Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18039-18048} }

Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor
Jae Hyeon Park,
Gyoomin Lee,
Seunggi Park,
Sung In Cho
[pdf] [supp]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jae Hyeon and Lee, Gyoomin and Park, Seunggi and Cho, Sung In}, title = {Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17722-17731} }

A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames
Pinelopi Papalampidi,
Skanda Koppula,
Shreya Pathak,
Justin Chiu,
Joe Heyward,
Viorica Patraucean,
Jiajun Shen,
Antoine Miech,
Andrew Zisserman,
Aida Nematzdeh
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papalampidi_2024_CVPR, author = {Papalampidi, Pinelopi and Koppula, Skanda and Pathak, Shreya and Chiu, Justin and Heyward, Joe and Patraucean, Viorica and Shen, Jiajun and Miech, Antoine and Zisserman, Andrew and Nematzdeh, Aida}, title = {A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14386-14397} }

Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation
Xiao Ma,
Sumit Patidar,
Iain Haughton,
Stephen James
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Xiao and Patidar, Sumit and Haughton, Iain and James, Stephen}, title = {Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18081-18090} }

Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions
Runhao Zeng,
Xiaoyong Chen,
Jiaming Liang,
Huisi Wu,
Guangzhong Cao,
Yong Guo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Runhao and Chen, Xiaoyong and Liang, Jiaming and Wu, Huisi and Cao, Guangzhong and Guo, Yong}, title = {Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18263-18274} }

Open-World Human-Object Interaction Detection via Multi-modal Prompts
Jie Yang,
Bingliang Li,
Ailing Zeng,
Lei Zhang,
Ruimao Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jie and Li, Bingliang and Zeng, Ailing and Zhang, Lei and Zhang, Ruimao}, title = {Open-World Human-Object Interaction Detection via Multi-modal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16954-16964} }

UniMODE: Unified Monocular 3D Object Detection
Zhuoling Li,
Xiaogang Xu,
SerNam Lim,
Hengshuang Zhao
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhuoling and Xu, Xiaogang and Lim, SerNam and Zhao, Hengshuang}, title = {UniMODE: Unified Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16561-16570} }

Multi-agent Collaborative Perception via Motion-aware Robust Communication Network
Shixin Hong,
Yu Liu,
Zhi Li,
Shaohui Li,
You He
[pdf]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Shixin and Liu, Yu and Li, Zhi and Li, Shaohui and He, You}, title = {Multi-agent Collaborative Perception via Motion-aware Robust Communication Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15301-15310} }

The Manga Whisperer: Automatically Generating Transcriptions for Comics
Ragav Sachdeva,
Andrew Zisserman
[pdf] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2024_CVPR, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {The Manga Whisperer: Automatically Generating Transcriptions for Comics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12967-12976} }

Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection
Heng Zhang,
Qiuyu Zhao,
Linyu Zheng,
Hao Zeng,
Zhiwei Ge,
Tianhao Li,
Sulong Xu
[pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Heng and Zhao, Qiuyu and Zheng, Linyu and Zeng, Hao and Ge, Zhiwei and Li, Tianhao and Xu, Sulong}, title = {Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16975-16984} }

MovieChat: From Dense Token to Sparse Memory for Long Video Understanding
Enxin Song,
Wenhao Chai,
Guanhong Wang,
Yucheng Zhang,
Haoyang Zhou,
Feiyang Wu,
Haozhe Chi,
Xun Guo,
Tian Ye,
Yanting Zhang,
Yan Lu,
Jenq-Neng Hwang,
Gaoang Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Enxin and Chai, Wenhao and Wang, Guanhong and Zhang, Yucheng and Zhou, Haoyang and Wu, Feiyang and Chi, Haozhe and Guo, Xun and Ye, Tian and Zhang, Yanting and Lu, Yan and Hwang, Jenq-Neng and Wang, Gaoang}, title = {MovieChat: From Dense Token to Sparse Memory for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18221-18232} }

Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods
Mingqi Jiang,
Saeed Khorram,
Li Fuxin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Mingqi and Khorram, Saeed and Fuxin, Li}, title = {Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9546-9555} }

Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion
Fan Zhang,
Shaodi You,
Yu Li,
Ying Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and You, Shaodi and Li, Yu and Fu, Ying}, title = {Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11852-11861} }

Matching Anything by Segmenting Anything
Siyuan Li,
Lei Ke,
Martin Danelljan,
Luigi Piccinelli,
Mattia Segu,
Luc Van Gool,
Fisher Yu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Siyuan and Ke, Lei and Danelljan, Martin and Piccinelli, Luigi and Segu, Mattia and Van Gool, Luc and Yu, Fisher}, title = {Matching Anything by Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18963-18973} }

Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection
Jiacheng Zhang,
Jiaming Li,
Xiangru Lin,
Wei Zhang,
Xiao Tan,
Junyu Han,
Errui Ding,
Jingdong Wang,
Guanbin Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiacheng and Li, Jiaming and Lin, Xiangru and Zhang, Wei and Tan, Xiao and Han, Junyu and Ding, Errui and Wang, Jingdong and Li, Guanbin}, title = {Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16923-16932} }

Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation
Ming Xu,
Stephen Gould
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Ming and Gould, Stephen}, title = {Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14618-14627} }

Learning Transferable Negative Prompts for Out-of-Distribution Detection
Tianqi Li,
Guansong Pang,
Xiao Bai,
Wenjun Miao,
Jin Zheng
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Tianqi and Pang, Guansong and Bai, Xiao and Miao, Wenjun and Zheng, Jin}, title = {Learning Transferable Negative Prompts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17584-17594} }

Holistic Features are almost Sufficient for Text-to-Video Retrieval
Kaibin Tian,
Ruixiang Zhao,
Zijie Xin,
Bangxiang Lan,
Xirong Li
[pdf]
[bibtex]
@InProceedings{Tian_2024_CVPR, author = {Tian, Kaibin and Zhao, Ruixiang and Xin, Zijie and Lan, Bangxiang and Li, Xirong}, title = {Holistic Features are almost Sufficient for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17138-17147} }

Uncertainty-aware Action Decoupling Transformer for Action Anticipation
Hongji Guo,
Nakul Agarwal,
Shao-Yuan Lo,
Kwonjoon Lee,
Qiang Ji
[pdf] [supp]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Hongji and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon and Ji, Qiang}, title = {Uncertainty-aware Action Decoupling Transformer for Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18644-18654} }

One-Prompt to Segment All Medical Images
Junde Wu,
Min Xu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Junde and Xu, Min}, title = {One-Prompt to Segment All Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11302-11312} }

GROUNDHOG: Grounding Large Language Models to Holistic Segmentation
Yichi Zhang,
Ziqiao Ma,
Xiaofeng Gao,
Suhaila Shakiah,
Qiaozi Gao,
Joyce Chai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yichi and Ma, Ziqiao and Gao, Xiaofeng and Shakiah, Suhaila and Gao, Qiaozi and Chai, Joyce}, title = {GROUNDHOG: Grounding Large Language Models to Holistic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14227-14238} }

Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts
Jialin Wu,
Xia Hu,
Yaqing Wang,
Bo Pang,
Radu Soricut
[pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Jialin and Hu, Xia and Wang, Yaqing and Pang, Bo and Soricut, Radu}, title = {Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14205-14215} }

SeMoLi: What Moves Together Belongs Together
Jenny Seidenschwarz,
Aljosa Osep,
Francesco Ferroni,
Simon Lucey,
Laura Leal-Taixe
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seidenschwarz_2024_CVPR, author = {Seidenschwarz, Jenny and Osep, Aljosa and Ferroni, Francesco and Lucey, Simon and Leal-Taixe, Laura}, title = {SeMoLi: What Moves Together Belongs Together}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14685-14694} }

Context-Guided Spatio-Temporal Video Grounding
Xin Gu,
Heng Fan,
Yan Huang,
Tiejian Luo,
Libo Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Xin and Fan, Heng and Huang, Yan and Luo, Tiejian and Zhang, Libo}, title = {Context-Guided Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18330-18339} }

Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions
Namitha Padmanabhan,
Matthew Gwilliam,
Pulkit Kumar,
Shishira R Maiya,
Max Ehrlich,
Abhinav Shrivastava
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Padmanabhan_2024_CVPR, author = {Padmanabhan, Namitha and Gwilliam, Matthew and Kumar, Pulkit and Maiya, Shishira R and Ehrlich, Max and Shrivastava, Abhinav}, title = {Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10957-10967} }

Adapting to Length Shift: FlexiLength Network for Trajectory Prediction
Yi Xu,
Yun Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yi and Fu, Yun}, title = {Adapting to Length Shift: FlexiLength Network for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15226-15237} }

WorDepth: Variational Language Prior for Monocular Depth Estimation
Ziyao Zeng,
Daniel Wang,
Fengyu Yang,
Hyoungseob Park,
Stefano Soatto,
Dong Lao,
Alex Wong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Ziyao and Wang, Daniel and Yang, Fengyu and Park, Hyoungseob and Soatto, Stefano and Lao, Dong and Wong, Alex}, title = {WorDepth: Variational Language Prior for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9708-9719} }

A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning
Yuelin Zhang,
Pengyu Zheng,
Wanquan Yan,
Chengyu Fang,
Shing Shin Cheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuelin and Zheng, Pengyu and Yan, Wanquan and Fang, Chengyu and Cheng, Shing Shin}, title = {A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11125-11136} }

Frozen Feature Augmentation for Few-Shot Image Classification
Andreas Bär,
Neil Houlsby,
Mostafa Dehghani,
Manoj Kumar
[pdf] [supp]
[bibtex]
@InProceedings{Bar_2024_CVPR, author = {B\"ar, Andreas and Houlsby, Neil and Dehghani, Mostafa and Kumar, Manoj}, title = {Frozen Feature Augmentation for Few-Shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16046-16057} }

Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition
Kyle Buettner,
Sina Malakouti,
Xiang Lorraine Li,
Adriana Kovashka
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Buettner_2024_CVPR, author = {Buettner, Kyle and Malakouti, Sina and Li, Xiang Lorraine and Kovashka, Adriana}, title = {Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13515-13524} }

PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs
Michael Dorkenwald,
Nimrod Barazani,
Cees G. M. Snoek,
Yuki M. Asano
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dorkenwald_2024_CVPR, author = {Dorkenwald, Michael and Barazani, Nimrod and Snoek, Cees G. M. and Asano, Yuki M.}, title = {PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13548-13558} }

UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence
Ruihai Wu,
Haoran Lu,
Yiyan Wang,
Yubo Wang,
Hao Dong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Ruihai and Lu, Haoran and Wang, Yiyan and Wang, Yubo and Dong, Hao}, title = {UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16340-16350} }

Multi-Attribute Interactions Matter for 3D Visual Grounding
Can Xu,
Yuehui Han,
Rui Xu,
Le Hui,
Jin Xie,
Jian Yang
[pdf]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Can and Han, Yuehui and Xu, Rui and Hui, Le and Xie, Jin and Yang, Jian}, title = {Multi-Attribute Interactions Matter for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17253-17262} }

SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image
Yunhao Li,
Xiaodong Wang,
Ping Wang,
Xin Yuan,
Peidong Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yunhao and Wang, Xiaodong and Wang, Ping and Yuan, Xin and Liu, Peidong}, title = {SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10542-10552} }

Improved Visual Grounding through Self-Consistent Explanations
Ruozhen He,
Paola Cascante-Bonilla,
Ziyan Yang,
Alexander C. Berg,
Vicente Ordonez
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Ruozhen and Cascante-Bonilla, Paola and Yang, Ziyan and Berg, Alexander C. and Ordonez, Vicente}, title = {Improved Visual Grounding through Self-Consistent Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13095-13105} }

DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement
Jiuming Liu,
Guangming Wang,
Weicai Ye,
Chaokang Jiang,
Jinru Han,
Zhe Liu,
Guofeng Zhang,
Dalong Du,
Hesheng Wang
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jiuming and Wang, Guangming and Ye, Weicai and Jiang, Chaokang and Han, Jinru and Liu, Zhe and Zhang, Guofeng and Du, Dalong and Wang, Hesheng}, title = {DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15109-15119} }

FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models
Lin Zhao,
Tianchen Zhao,
Zinan Lin,
Xuefei Ning,
Guohao Dai,
Huazhong Yang,
Yu Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lin and Zhao, Tianchen and Lin, Zinan and Ning, Xuefei and Dai, Guohao and Yang, Huazhong and Wang, Yu}, title = {FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16122-16131} }

View From Above: Orthogonal-View aware Cross-view Localization
Shan Wang,
Chuong Nguyen,
Jiawei Liu,
Yanhao Zhang,
Sundaram Muthu,
Fahira Afzal Maken,
Kaihao Zhang,
Hongdong Li
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shan and Nguyen, Chuong and Liu, Jiawei and Zhang, Yanhao and Muthu, Sundaram and Maken, Fahira Afzal and Zhang, Kaihao and Li, Hongdong}, title = {View From Above: Orthogonal-View aware Cross-view Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14843-14852} }

PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition
Haosong Zhang,
Mei Chee Leong,
Liyuan Li,
Weisi Lin
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haosong and Leong, Mei Chee and Li, Liyuan and Lin, Weisi}, title = {PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18857-18867} }

DeepCache: Accelerating Diffusion Models for Free
Xinyin Ma,
Gongfan Fang,
Xinchao Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {DeepCache: Accelerating Diffusion Models for Free}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15762-15772} }

Learning Correlation Structures for Vision Transformers
Manjin Kim,
Paul Hongsuck Seo,
Cordelia Schmid,
Minsu Cho
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Manjin and Seo, Paul Hongsuck and Schmid, Cordelia and Cho, Minsu}, title = {Learning Correlation Structures for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18941-18951} }

PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation
Ruining Deng,
Quan Liu,
Can Cui,
Tianyuan Yao,
Jialin Yue,
Juming Xiong,
Lining Yu,
Yifei Wu,
Mengmeng Yin,
Yu Wang,
Shilin Zhao,
Yucheng Tang,
Haichun Yang,
Yuankai Huo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Ruining and Liu, Quan and Cui, Can and Yao, Tianyuan and Yue, Jialin and Xiong, Juming and Yu, Lining and Wu, Yifei and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Tang, Yucheng and Yang, Haichun and Huo, Yuankai}, title = {PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11736-11746} }

Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling
Kranthi Kumar Rachavarapu,
Kalyan Ramakrishnan,
Rajagopalan A. N.
[pdf] [supp]
[bibtex]
@InProceedings{Rachavarapu_2024_CVPR, author = {Rachavarapu, Kranthi Kumar and Ramakrishnan, Kalyan and N., Rajagopalan A.}, title = {Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18952-18962} }

Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering
Vivek Gopalakrishnan,
Neel Dey,
Polina Golland
[pdf] [supp]
[bibtex]
@InProceedings{Gopalakrishnan_2024_CVPR, author = {Gopalakrishnan, Vivek and Dey, Neel and Golland, Polina}, title = {Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11662-11672} }

MICap: A Unified Model for Identity-Aware Movie Descriptions
Haran Raajesh,
Naveen Reddy Desanur,
Zeeshan Khan,
Makarand Tapaswi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raajesh_2024_CVPR, author = {Raajesh, Haran and Desanur, Naveen Reddy and Khan, Zeeshan and Tapaswi, Makarand}, title = {MICap: A Unified Model for Identity-Aware Movie Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14011-14021} }

MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models
Yasiru Ranasinghe,
Deepti Hegde,
Vishal M. Patel
[pdf] [supp]
[bibtex]
@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Hegde, Deepti and Patel, Vishal M.}, title = {MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10659-10670} }

An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning
Jianqing Zhang,
Yang Liu,
Yang Hua,
Jian Cao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jianqing and Liu, Yang and Hua, Yang and Cao, Jian}, title = {An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12109-12119} }

Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation
Xiaohan Lei,
Min Wang,
Wengang Zhou,
Li Li,
Houqiang Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2024_CVPR, author = {Lei, Xiaohan and Wang, Min and Zhou, Wengang and Li, Li and Li, Houqiang}, title = {Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16329-16339} }

One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion
Minghua Liu,
Ruoxi Shi,
Linghao Chen,
Zhuoyang Zhang,
Chao Xu,
Xinyue Wei,
Hansheng Chen,
Chong Zeng,
Jiayuan Gu,
Hao Su
[pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Minghua and Shi, Ruoxi and Chen, Linghao and Zhang, Zhuoyang and Xu, Chao and Wei, Xinyue and Chen, Hansheng and Zeng, Chong and Gu, Jiayuan and Su, Hao}, title = {One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10072-10083} }

Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation
Shanshan Zhong,
Zhongzhan Huang,
Shanghua Gao,
Wushao Wen,
Liang Lin,
Marinka Zitnik,
Pan Zhou
[pdf] [supp]
[bibtex]
@InProceedings{Zhong_2024_CVPR, author = {Zhong, Shanshan and Huang, Zhongzhan and Gao, Shanghua and Wen, Wushao and Lin, Liang and Zitnik, Marinka and Zhou, Pan}, title = {Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13246-13257} }

SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes
Alexandros Delitzas,
Ayca Takmaz,
Federico Tombari,
Robert Sumner,
Marc Pollefeys,
Francis Engelmann
[pdf] [supp]
[bibtex]
@InProceedings{Delitzas_2024_CVPR, author = {Delitzas, Alexandros and Takmaz, Ayca and Tombari, Federico and Sumner, Robert and Pollefeys, Marc and Engelmann, Francis}, title = {SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14531-14542} }

Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning
Wei Zhang,
Chaoqun Wan,
Tongliang Liu,
Xinmei Tian,
Xu Shen,
Jieping Ye
[pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wei and Wan, Chaoqun and Liu, Tongliang and Tian, Xinmei and Shen, Xu and Ye, Jieping}, title = {Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18504-18515} }

UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation
Hong Li,
Yutang Feng,
Song Xue,
Xuhui Liu,
Bohan Zeng,
Shanglin Li,
Boyu Liu,
Jianzhuang Liu,
Shumin Han,
Baochang Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hong and Feng, Yutang and Xue, Song and Liu, Xuhui and Zeng, Bohan and Li, Shanglin and Liu, Boyu and Liu, Jianzhuang and Han, Shumin and Zhang, Baochang}, title = {UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10585-10595} }

A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification
Zexian Yang,
Dayan Wu,
Chenming Wu,
Zheng Lin,
Jingzi Gu,
Weiping Wang
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zexian and Wu, Dayan and Wu, Chenming and Lin, Zheng and Gu, Jingzi and Wang, Weiping}, title = {A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17343-17353} }

NetTrack: Tracking Highly Dynamic Objects with a Net
Guangze Zheng,
Shijie Lin,
Haobo Zuo,
Changhong Fu,
Jia Pan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Guangze and Lin, Shijie and Zuo, Haobo and Fu, Changhong and Pan, Jia}, title = {NetTrack: Tracking Highly Dynamic Objects with a Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19145-19155} }

Grounded Question-Answering in Long Egocentric Videos
Shangzhe Di,
Weidi Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_2024_CVPR, author = {Di, Shangzhe and Xie, Weidi}, title = {Grounded Question-Answering in Long Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12934-12943} }

HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention
Xiaolong Tang,
Meina Kan,
Shiguang Shan,
Zhilong Ji,
Jinfeng Bai,
Xilin Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Xiaolong and Kan, Meina and Shan, Shiguang and Ji, Zhilong and Bai, Jinfeng and Chen, Xilin}, title = {HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15261-15270} }

SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology
Saarthak Kapse,
Pushpak Pati,
Srijan Das,
Jingwei Zhang,
Chao Chen,
Maria Vakalopoulou,
Joel Saltz,
Dimitris Samaras,
Rajarsi R. Gupta,
Prateek Prasanna
[pdf] [supp]
[bibtex]
@InProceedings{Kapse_2024_CVPR, author = {Kapse, Saarthak and Pati, Pushpak and Das, Srijan and Zhang, Jingwei and Chen, Chao and Vakalopoulou, Maria and Saltz, Joel and Samaras, Dimitris and Gupta, Rajarsi R. and Prasanna, Prateek}, title = {SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11226-11237} }

LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding
Min Liang,
Jia-Wei Ma,
Xiaobin Zhu,
Jingyan Qin,
Xu-Cheng Yin
[pdf]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Min and Ma, Jia-Wei and Zhu, Xiaobin and Qin, Jingyan and Yin, Xu-Cheng}, title = {LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15665-15674} }

GLOW: Global Layout Aware Attacks on Object Detection
Jun Bao,
Buyu Liu,
Kui Ren,
Jun Yu
[pdf] [supp]
[bibtex]
@InProceedings{Bao_2024_CVPR, author = {Bao, Jun and Liu, Buyu and Ren, Kui and Yu, Jun}, title = {GLOW: Global Layout Aware Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12057-12066} }

SIRA: Scalable Inter-frame Relation and Association for Radar Perception
Ryoma Yataka,
Pu Wang,
Petros Boufounos,
Ryuhei Takahashi
[pdf] [supp]
[bibtex]
@InProceedings{Yataka_2024_CVPR, author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros and Takahashi, Ryuhei}, title = {SIRA: Scalable Inter-frame Relation and Association for Radar Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15024-15034} }

VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment
Phong Tran,
Egor Zakharov,
Long-Nhat Ho,
Anh Tuan Tran,
Liwen Hu,
Hao Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2024_CVPR, author = {Tran, Phong and Zakharov, Egor and Ho, Long-Nhat and Tran, Anh Tuan and Hu, Liwen and Li, Hao}, title = {VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10336-10348} }

Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation
Yunhao Ge,
Xiaohui Zeng,
Jacob Samuel Huffman,
Tsung-Yi Lin,
Ming-Yu Liu,
Yin Cui
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2024_CVPR, author = {Ge, Yunhao and Zeng, Xiaohui and Huffman, Jacob Samuel and Lin, Tsung-Yi and Liu, Ming-Yu and Cui, Yin}, title = {Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14033-14042} }

Communication-Efficient Collaborative Perception via Information Filling with Codebook
Yue Hu,
Juntong Peng,
Sifei Liu,
Junhao Ge,
Si Liu,
Siheng Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Yue and Peng, Juntong and Liu, Sifei and Ge, Junhao and Liu, Si and Chen, Siheng}, title = {Communication-Efficient Collaborative Perception via Information Filling with Codebook}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15481-15490} }

MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation
Hanzhe Hu,
Zhizhuo Zhou,
Varun Jampani,
Shubham Tulsiani
[pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Hanzhe and Zhou, Zhizhuo and Jampani, Varun and Tulsiani, Shubham}, title = {MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9698-9707} }

Effective Video Mirror Detection with Inconsistent Motion Cues
Alex Warren,
Ke Xu,
Jiaying Lin,
Gary K.L. Tam,
Rynson W.H. Lau
[pdf] [supp]
[bibtex]
@InProceedings{Warren_2024_CVPR, author = {Warren, Alex and Xu, Ke and Lin, Jiaying and Tam, Gary K.L. and Lau, Rynson W.H.}, title = {Effective Video Mirror Detection with Inconsistent Motion Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17244-17252} }

DiffLoc: Diffusion Model for Outdoor LiDAR Localization
Wen Li,
Yuyang Yang,
Shangshu Yu,
Guosheng Hu,
Chenglu Wen,
Ming Cheng,
Cheng Wang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Wen and Yang, Yuyang and Yu, Shangshu and Hu, Guosheng and Wen, Chenglu and Cheng, Ming and Wang, Cheng}, title = {DiffLoc: Diffusion Model for Outdoor LiDAR Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15045-15054} }

On Scaling Up a Multilingual Vision and Language Model
Xi Chen,
Josip Djolonga,
Piotr Padlewski,
Basil Mustafa,
Soravit Changpinyo,
Jialin Wu,
Carlos Riquelme Ruiz,
Sebastian Goodman,
Xiao Wang,
Yi Tay,
Siamak Shakeri,
Mostafa Dehghani,
Daniel Salz,
Mario Lucic,
Michael Tschannen,
Arsha Nagrani,
Hexiang Hu,
Mandar Joshi,
Bo Pang,
Ceslee Montgomery,
Paulina Pietrzyk,
Marvin Ritter,
AJ Piergiovanni,
Matthias Minderer,
Filip Pavetic,
Austin Waters,
Gang Li,
Ibrahim Alabdulmohsin,
Lucas Beyer,
Julien Amelot,
Kenton Lee,
Andreas Peter Steiner,
Yang Li,
Daniel Keysers,
Anurag Arnab,
Yuanzhong Xu,
Keran Rong,
Alexander Kolesnikov,
Mojtaba Seyedhosseini,
Anelia Angelova,
Xiaohua Zhai,
Neil Houlsby,
Radu Soricut
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Djolonga, Josip and Padlewski, Piotr and Mustafa, Basil and Changpinyo, Soravit and Wu, Jialin and Ruiz, Carlos Riquelme and Goodman, Sebastian and Wang, Xiao and Tay, Yi and Shakeri, Siamak and Dehghani, Mostafa and Salz, Daniel and Lucic, Mario and Tschannen, Michael and Nagrani, Arsha and Hu, Hexiang and Joshi, Mandar and Pang, Bo and Montgomery, Ceslee and Pietrzyk, Paulina and Ritter, Marvin and Piergiovanni, AJ and Minderer, Matthias and Pavetic, Filip and Waters, Austin and Li, Gang and Alabdulmohsin, Ibrahim and Beyer, Lucas and Amelot, Julien and Lee, Kenton and Steiner, Andreas Peter and Li, Yang and Keysers, Daniel and Arnab, Anurag and Xu, Yuanzhong and Rong, Keran and Kolesnikov, Alexander and Seyedhosseini, Mojtaba and Angelova, Anelia and Zhai, Xiaohua and Houlsby, Neil and Soricut, Radu}, title = {On Scaling Up a Multilingual Vision and Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14432-14444} }

Day-Night Cross-domain Vehicle Re-identification
Hongchao Li,
Jingong Chen,
Aihua Zheng,
Yong Wu,
Yonglong Luo
[pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hongchao and Chen, Jingong and Zheng, Aihua and Wu, Yong and Luo, Yonglong}, title = {Day-Night Cross-domain Vehicle Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12626-12635} }

Holodeck: Language Guided Generation of 3D Embodied AI Environments
Yue Yang,
Fan-Yun Sun,
Luca Weihs,
Eli VanderBilt,
Alvaro Herrasti,
Winson Han,
Jiajun Wu,
Nick Haber,
Ranjay Krishna,
Lingjie Liu,
Chris Callison-Burch,
Mark Yatskar,
Aniruddha Kembhavi,
Christopher Clark
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yue and Sun, Fan-Yun and Weihs, Luca and VanderBilt, Eli and Herrasti, Alvaro and Han, Winson and Wu, Jiajun and Haber, Nick and Krishna, Ranjay and Liu, Lingjie and Callison-Burch, Chris and Yatskar, Mark and Kembhavi, Aniruddha and Clark, Christopher}, title = {Holodeck: Language Guided Generation of 3D Embodied AI Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16227-16237} }

Distilled Datamodel with Reverse Gradient Matching
Jingwen Ye,
Ruonan Yu,
Songhua Liu,
Xinchao Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Yu, Ruonan and Liu, Songhua and Wang, Xinchao}, title = {Distilled Datamodel with Reverse Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11954-11963} }

Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection
Zhanwei Zhang,
Minghao Chen,
Shuai Xiao,
Liang Peng,
Hengjia Li,
Binbin Lin,
Ping Li,
Wenxiao Wang,
Boxi Wu,
Deng Cai
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhanwei and Chen, Minghao and Xiao, Shuai and Peng, Liang and Li, Hengjia and Lin, Binbin and Li, Ping and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15291-15300} }

Reconstructing Hands in 3D with Transformers
Georgios Pavlakos,
Dandan Shan,
Ilija Radosavovic,
Angjoo Kanazawa,
David Fouhey,
Jitendra Malik
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pavlakos_2024_CVPR, author = {Pavlakos, Georgios and Shan, Dandan and Radosavovic, Ilija and Kanazawa, Angjoo and Fouhey, David and Malik, Jitendra}, title = {Reconstructing Hands in 3D with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9826-9836} }

PELA: Learning Parameter-Efficient Models with Low-Rank Approximation
Yangyang Guo,
Guangzhi Wang,
Mohan Kankanhalli
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Yangyang and Wang, Guangzhi and Kankanhalli, Mohan}, title = {PELA: Learning Parameter-Efficient Models with Low-Rank Approximation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15699-15709} }

Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch
Xidong Wu,
Shangqian Gao,
Zeyu Zhang,
Zhenzhen Li,
Runxue Bao,
Yanfu Zhang,
Xiaoqian Wang,
Heng Huang
[pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xidong and Gao, Shangqian and Zhang, Zeyu and Li, Zhenzhen and Bao, Runxue and Zhang, Yanfu and Wang, Xiaoqian and Huang, Heng}, title = {Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16163-16173} }

Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation
Qinghe Ma,
Jian Zhang,
Lei Qi,
Qian Yu,
Yinghuan Shi,
Yang Gao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Qinghe and Zhang, Jian and Qi, Lei and Yu, Qian and Shi, Yinghuan and Gao, Yang}, title = {Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11642-11651} }

From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding
Yong-Lu Li,
Xiaoqian Wu,
Xinpeng Liu,
Zehao Wang,
Yiming Dou,
Yikun Ji,
Junyi Zhang,
Yixing Li,
Xudong Lu,
Jingru Tan,
Cewu Lu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yong-Lu and Wu, Xiaoqian and Liu, Xinpeng and Wang, Zehao and Dou, Yiming and Ji, Yikun and Zhang, Junyi and Li, Yixing and Lu, Xudong and Tan, Jingru and Lu, Cewu}, title = {From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16582-16592} }

Bootstrapping Autonomous Driving Radars with Self-Supervised Learning
Yiduo Hao,
Sohrab Madani,
Junfeng Guan,
Mohammed Alloulah,
Saurabh Gupta,
Haitham Hassanieh
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2024_CVPR, author = {Hao, Yiduo and Madani, Sohrab and Guan, Junfeng and Alloulah, Mohammed and Gupta, Saurabh and Hassanieh, Haitham}, title = {Bootstrapping Autonomous Driving Radars with Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15012-15023} }

Weakly Supervised Monocular 3D Detection with a Single-View Image
Xueying Jiang,
Sheng Jin,
Lewei Lu,
Xiaoqin Zhang,
Shijian Lu
[pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xueying and Jin, Sheng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Weakly Supervised Monocular 3D Detection with a Single-View Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10508-10518} }

Blind Image Quality Assessment Based on Geometric Order Learning
Nyeong-Ho Shin,
Seon-Ho Lee,
Chang-Su Kim
[pdf] [supp]
[bibtex]
@InProceedings{Shin_2024_CVPR, author = {Shin, Nyeong-Ho and Lee, Seon-Ho and Kim, Chang-Su}, title = {Blind Image Quality Assessment Based on Geometric Order Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12799-12808} }

Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction
Hao Li,
Ying Chen,
Yifei Chen,
Rongshan Yu,
Wenxian Yang,
Liansheng Wang,
Bowen Ding,
Yuchen Han
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hao and Chen, Ying and Chen, Yifei and Yu, Rongshan and Yang, Wenxian and Wang, Liansheng and Ding, Bowen and Han, Yuchen}, title = {Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11398-11407} }

Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge
Haoxiang Ma,
Modi Shi,
Boyang Gao,
Di Huang
[pdf] [supp]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Haoxiang and Shi, Modi and Gao, Boyang and Huang, Di}, title = {Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18102-18111} }

RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation
Oded Bialer,
Yuval Haitman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bialer_2024_CVPR, author = {Bialer, Oded and Haitman, Yuval}, title = {RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15407-15416} }

3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling
Chaokang Jiang,
Guangming Wang,
Jiuming Liu,
Hesheng Wang,
Zhuang Ma,
Zhenqiang Liu,
Zhujin Liang,
Yi Shan,
Dalong Du
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Chaokang and Wang, Guangming and Liu, Jiuming and Wang, Hesheng and Ma, Zhuang and Liu, Zhenqiang and Liang, Zhujin and Shan, Yi and Du, Dalong}, title = {3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15173-15183} }

Question Aware Vision Transformer for Multimodal Reasoning
Roy Ganz,
Yair Kittenplon,
Aviad Aberdam,
Elad Ben Avraham,
Oren Nuriel,
Shai Mazor,
Ron Litman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganz_2024_CVPR, author = {Ganz, Roy and Kittenplon, Yair and Aberdam, Aviad and Ben Avraham, Elad and Nuriel, Oren and Mazor, Shai and Litman, Ron}, title = {Question Aware Vision Transformer for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13861-13871} }

OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition
Tongjia Chen,
Hongshan Yu,
Zhengeng Yang,
Zechuan Li,
Wei Sun,
Chen Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tongjia and Yu, Hongshan and Yang, Zhengeng and Li, Zechuan and Sun, Wei and Chen, Chen}, title = {OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18888-18898} }

Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation
Mukul Khanna,
Yongsen Mao,
Hanxiao Jiang,
Sanjay Haresh,
Brennan Shacklett,
Dhruv Batra,
Alexander Clegg,
Eric Undersander,
Angel X. Chang,
Manolis Savva
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Mao, Yongsen and Jiang, Hanxiao and Haresh, Sanjay and Shacklett, Brennan and Batra, Dhruv and Clegg, Alexander and Undersander, Eric and Chang, Angel X. and Savva, Manolis}, title = {Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16384-16393} }

NViST: In the Wild New View Synthesis from a Single Image with Transformers
Wonbong Jang,
Lourdes Agapito
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Wonbong and Agapito, Lourdes}, title = {NViST: In the Wild New View Synthesis from a Single Image with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10181-10193} }

Step Differences in Instructional Video
Tushar Nagarajan,
Lorenzo Torresani
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagarajan_2024_CVPR, author = {Nagarajan, Tushar and Torresani, Lorenzo}, title = {Step Differences in Instructional Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18740-18750} }

Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data
Lihe Yang,
Bingyi Kang,
Zilong Huang,
Xiaogang Xu,
Jiashi Feng,
Hengshuang Zhao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, title = {Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10371-10381} }

MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization
Jimin Xu,
Tianbao Wang,
Tao Jin,
Shengyu Zhang,
Dongjie Fu,
Zhe Wang,
Jiangjing Lyu,
Chengfei Lv,
Chaoyue Niu,
Zhou Yu,
Zhou Zhao,
Fei Wu
[pdf]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jimin and Wang, Tianbao and Jin, Tao and Zhang, Shengyu and Fu, Dongjie and Wang, Zhe and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Yu, Zhou and Zhao, Zhou and Wu, Fei}, title = {MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10682-10692} }

UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization
Shuaibo Li,
Wei Ma,
Jianwei Guo,
Shibiao Xu,
Benchong Li,
Xiaopeng Zhang
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shuaibo and Ma, Wei and Guo, Jianwei and Xu, Shibiao and Li, Benchong and Zhang, Xiaopeng}, title = {UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12523-12533} }

Situational Awareness Matters in 3D Vision Language Reasoning
Yunze Man,
Liang-Yan Gui,
Yu-Xiong Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2024_CVPR, author = {Man, Yunze and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Situational Awareness Matters in 3D Vision Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13678-13688} }

RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection
Zhiwei Lin,
Zhe Liu,
Zhongyu Xia,
Xinhao Wang,
Yongtao Wang,
Shengxiang Qi,
Yang Dong,
Nan Dong,
Le Zhang,
Ce Zhu
[pdf]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Zhiwei and Liu, Zhe and Xia, Zhongyu and Wang, Xinhao and Wang, Yongtao and Qi, Shengxiang and Dong, Yang and Dong, Nan and Zhang, Le and Zhu, Ce}, title = {RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14928-14937} }

Adaptive Softassign via Hadamard-Equipped Sinkhorn
Binrui Shen,
Qiang Niu,
Shengxin Zhu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Binrui and Niu, Qiang and Zhu, Shengxin}, title = {Adaptive Softassign via Hadamard-Equipped Sinkhorn}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17638-17647} }

Re-thinking Data Availability Attacks Against Deep Neural Networks
Bin Fang,
Bo Li,
Shuang Wu,
Shouhong Ding,
Ran Yi,
Lizhuang Ma
[pdf] [supp]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Bin and Li, Bo and Wu, Shuang and Ding, Shouhong and Yi, Ran and Ma, Lizhuang}, title = {Re-thinking Data Availability Attacks Against Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12215-12224} }

SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection
Mingxuan Liu,
Tyler L. Hayes,
Elisa Ricci,
Gabriela Csurka,
Riccardo Volpi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Mingxuan and Hayes, Tyler L. and Ricci, Elisa and Csurka, Gabriela and Volpi, Riccardo}, title = {SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16634-16644} }

Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels
Tianming Liang,
Chaolei Tan,
Beihao Xia,
Wei-Shi Zheng,
Jian-Fang Hu
[pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Tianming and Tan, Chaolei and Xia, Beihao and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13161-13170} }

Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes
Liqiong Wang,
Jinyu Yang,
Yanfu Zhang,
Fangyi Wang,
Feng Zheng
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Liqiong and Yang, Jinyu and Zhang, Yanfu and Wang, Fangyi and Zheng, Feng}, title = {Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17201-17211} }

Solving the Catastrophic Forgetting Problem in Generalized Category Discovery
Xinzi Cao,
Xiawu Zheng,
Guanhong Wang,
Weijiang Yu,
Yunhang Shen,
Ke Li,
Yutong Lu,
Yonghong Tian
[pdf] [supp]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Xinzi and Zheng, Xiawu and Wang, Guanhong and Yu, Weijiang and Shen, Yunhang and Li, Ke and Lu, Yutong and Tian, Yonghong}, title = {Solving the Catastrophic Forgetting Problem in Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16880-16889} }

Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images
JungEun Kim,
Hangyul Yoon,
Geondo Park,
Kyungsu Kim,
Eunho Yang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, JungEun and Yoon, Hangyul and Park, Geondo and Kim, Kyungsu and Yang, Eunho}, title = {Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11353-11364} }

Learning the 3D Fauna of the Web
Zizhang Li,
Dor Litvak,
Ruining Li,
Yunzhi Zhang,
Tomas Jakab,
Christian Rupprecht,
Shangzhe Wu,
Andrea Vedaldi,
Jiajun Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zizhang and Litvak, Dor and Li, Ruining and Zhang, Yunzhi and Jakab, Tomas and Rupprecht, Christian and Wu, Shangzhe and Vedaldi, Andrea and Wu, Jiajun}, title = {Learning the 3D Fauna of the Web}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9752-9762} }

LISA: Reasoning Segmentation via Large Language Model
Xin Lai,
Zhuotao Tian,
Yukang Chen,
Yanwei Li,
Yuhui Yuan,
Shu Liu,
Jiaya Jia
[pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2024_CVPR, author = {Lai, Xin and Tian, Zhuotao and Chen, Yukang and Li, Yanwei and Yuan, Yuhui and Liu, Shu and Jia, Jiaya}, title = {LISA: Reasoning Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9579-9589} }

Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection
Yicheng Xiao,
Zhuoyan Luo,
Yong Liu,
Yue Ma,
Hengwei Bian,
Yatai Ji,
Yujiu Yang,
Xiu Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yicheng and Luo, Zhuoyan and Liu, Yong and Ma, Yue and Bian, Hengwei and Ji, Yatai and Yang, Yujiu and Li, Xiu}, title = {Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18709-18719} }

MuseChat: A Conversational Music Recommendation System for Videos
Zhikang Dong,
Xiulong Liu,
Bin Chen,
Pawel Polak,
Peng Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Zhikang and Liu, Xiulong and Chen, Bin and Polak, Pawel and Zhang, Peng}, title = {MuseChat: A Conversational Music Recommendation System for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12775-12785} }

Device-Wise Federated Network Pruning
Shangqian Gao,
Junyi Li,
Zeyu Zhang,
Yanfu Zhang,
Weidong Cai,
Heng Huang
[pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Li, Junyi and Zhang, Zeyu and Zhang, Yanfu and Cai, Weidong and Huang, Heng}, title = {Device-Wise Federated Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12342-12352} }

MoReVQA: Exploring Modular Reasoning Models for Video Question Answering
Juhong Min,
Shyamal Buch,
Arsha Nagrani,
Minsu Cho,
Cordelia Schmid
[pdf] [arXiv]
[bibtex]
@InProceedings{Min_2024_CVPR, author = {Min, Juhong and Buch, Shyamal and Nagrani, Arsha and Cho, Minsu and Schmid, Cordelia}, title = {MoReVQA: Exploring Modular Reasoning Models for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13235-13245} }

Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach
Wei Dong,
Xing Zhang,
Bihui Chen,
Dawei Yan,
Zhijun Lin,
Qingsen Yan,
Peng Wang,
Yang Yang
[pdf] [supp]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Wei and Zhang, Xing and Chen, Bihui and Yan, Dawei and Lin, Zhijun and Yan, Qingsen and Wang, Peng and Yang, Yang}, title = {Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16101-16110} }

Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification
Kunlun Xu,
Xu Zou,
Yuxin Peng,
Jiahuan Zhou
[pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Kunlun and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16604-16613} }

Generating Enhanced Negatives for Training Language-Based Object Detectors
Shiyu Zhao,
Long Zhao,
Vijay Kumar B G,
Yumin Suh,
Dimitris N. Metaxas,
Manmohan Chandraker,
Samuel Schulter
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Zhao, Long and G, Vijay Kumar B and Suh, Yumin and Metaxas, Dimitris N. and Chandraker, Manmohan and Schulter, Samuel}, title = {Generating Enhanced Negatives for Training Language-Based Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13592-13602} }

FedAS: Bridging Inconsistency in Personalized Federated Learning
Xiyuan Yang,
Wenke Huang,
Mang Ye
[pdf]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xiyuan and Huang, Wenke and Ye, Mang}, title = {FedAS: Bridging Inconsistency in Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11986-11995} }

MoST: Multi-Modality Scene Tokenization for Motion Prediction
Norman Mu,
Jingwei Ji,
Zhenpei Yang,
Nate Harada,
Haotian Tang,
Kan Chen,
Charles R. Qi,
Runzhou Ge,
Kratarth Goel,
Zoey Yang,
Scott Ettinger,
Rami Al-Rfou,
Dragomir Anguelov,
Yin Zhou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2024_CVPR, author = {Mu, Norman and Ji, Jingwei and Yang, Zhenpei and Harada, Nate and Tang, Haotian and Chen, Kan and Qi, Charles R. and Ge, Runzhou and Goel, Kratarth and Yang, Zoey and Ettinger, Scott and Al-Rfou, Rami and Anguelov, Dragomir and Zhou, Yin}, title = {MoST: Multi-Modality Scene Tokenization for Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14988-14999} }

PIGEON: Predicting Image Geolocations
Lukas Haas,
Michal Skreta,
Silas Alberti,
Chelsea Finn
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haas_2024_CVPR, author = {Haas, Lukas and Skreta, Michal and Alberti, Silas and Finn, Chelsea}, title = {PIGEON: Predicting Image Geolocations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12893-12902} }

Flow-Guided Online Stereo Rectification for Wide Baseline Stereo
Anush Kumar,
Fahim Mannan,
Omid Hosseini Jafari,
Shile Li,
Felix Heide
[pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Anush and Mannan, Fahim and Jafari, Omid Hosseini and Li, Shile and Heide, Felix}, title = {Flow-Guided Online Stereo Rectification for Wide Baseline Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15375-15385} }

Driving Everywhere with Large Language Model Policy Adaptation
Boyi Li,
Yue Wang,
Jiageng Mao,
Boris Ivanovic,
Sushant Veer,
Karen Leung,
Marco Pavone
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Boyi and Wang, Yue and Mao, Jiageng and Ivanovic, Boris and Veer, Sushant and Leung, Karen and Pavone, Marco}, title = {Driving Everywhere with Large Language Model Policy Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14948-14957} }

Koala: Key Frame-Conditioned Long Video-LLM
Reuben Tan,
Ximeng Sun,
Ping Hu,
Jui-hsien Wang,
Hanieh Deilamsalehy,
Bryan A. Plummer,
Bryan Russell,
Kate Saenko
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Reuben and Sun, Ximeng and Hu, Ping and Wang, Jui-hsien and Deilamsalehy, Hanieh and Plummer, Bryan A. and Russell, Bryan and Saenko, Kate}, title = {Koala: Key Frame-Conditioned Long Video-LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13581-13591} }

HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models
Tianrui Guan,
Fuxiao Liu,
Xiyang Wu,
Ruiqi Xian,
Zongxia Li,
Xiaoyu Liu,
Xijun Wang,
Lichang Chen,
Furong Huang,
Yaser Yacoob,
Dinesh Manocha,
Tianyi Zhou
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2024_CVPR, author = {Guan, Tianrui and Liu, Fuxiao and Wu, Xiyang and Xian, Ruiqi and Li, Zongxia and Liu, Xiaoyu and Wang, Xijun and Chen, Lichang and Huang, Furong and Yacoob, Yaser and Manocha, Dinesh and Zhou, Tianyi}, title = {HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14375-14385} }

ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection
Yichen Bai,
Zongbo Han,
Bing Cao,
Xiaoheng Jiang,
Qinghua Hu,
Changqing Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2024_CVPR, author = {Bai, Yichen and Han, Zongbo and Cao, Bing and Jiang, Xiaoheng and Hu, Qinghua and Zhang, Changqing}, title = {ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17480-17489} }

Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model
Shraman Pramanick,
Guangxing Han,
Rui Hou,
Sayan Nag,
Ser-Nam Lim,
Nicolas Ballas,
Qifan Wang,
Rama Chellappa,
Amjad Almahairi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pramanick_2024_CVPR, author = {Pramanick, Shraman and Han, Guangxing and Hou, Rui and Nag, Sayan and Lim, Ser-Nam and Ballas, Nicolas and Wang, Qifan and Chellappa, Rama and Almahairi, Amjad}, title = {Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14076-14088} }

SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System
Yunfei Fan,
Tianyu Zhao,
Guidong Wang
[pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Yunfei and Zhao, Tianyu and Wang, Guidong}, title = {SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17964-17973} }

ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts
Mu Cai,
Haotian Liu,
Siva Karthik Mustikovela,
Gregory P. Meyer,
Yuning Chai,
Dennis Park,
Yong Jae Lee
[pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Mu and Liu, Haotian and Mustikovela, Siva Karthik and Meyer, Gregory P. and Chai, Yuning and Park, Dennis and Lee, Yong Jae}, title = {ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12914-12923} }

OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation
Ganlong Zhao,
Guanbin Li,
Weikai Chen,
Yizhou Yu
[pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ganlong and Li, Guanbin and Chen, Weikai and Yu, Yizhou}, title = {OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16296-16306} }

All Rivers Run to the Sea: Private Learning with Asymmetric Flows
Yue Niu,
Ramy E. Ali,
Saurav Prakash,
Salman Avestimehr
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2024_CVPR, author = {Niu, Yue and Ali, Ramy E. and Prakash, Saurav and Avestimehr, Salman}, title = {All Rivers Run to the Sea: Private Learning with Asymmetric Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12353-12362} }

HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions
Hao Xu,
Haipeng Li,
Yinqiao Wang,
Shuaicheng Liu,
Chi-Wing Fu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Hao and Li, Haipeng and Wang, Yinqiao and Liu, Shuaicheng and Fu, Chi-Wing}, title = {HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10159-10169} }

A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection
Hanshi Wang,
Zhipeng Zhang,
Jin Gao,
Weiming Hu
[pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hanshi and Zhang, Zhipeng and Gao, Jin and Hu, Weiming}, title = {A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14978-14987} }

Visual Objectification in Films: Towards a New AI Task for Video Interpretation
Julie Tores,
Lucile Sassatelli,
Hui-Yin Wu,
Clement Bergman,
Léa Andolfi,
Victor Ecrement,
Frédéric Precioso,
Thierry Devars,
Magali Guaresi,
Virginie Julliard,
Sarah Lecossais
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tores_2024_CVPR, author = {Tores, Julie and Sassatelli, Lucile and Wu, Hui-Yin and Bergman, Clement and Andolfi, L\'ea and Ecrement, Victor and Precioso, Fr\'ed\'eric and Devars, Thierry and Guaresi, Magali and Julliard, Virginie and Lecossais, Sarah}, title = {Visual Objectification in Films: Towards a New AI Task for Video Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10864-10874} }

BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image
Minje Kim,
Tae-Kyun Kim
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Minje and Kim, Tae-Kyun}, title = {BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10726-10735} }

Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs
Kanchana Ranasinghe,
Satya Narayan Shukla,
Omid Poursaeed,
Michael S. Ryoo,
Tsung-Yu Lin
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Kanchana and Shukla, Satya Narayan and Poursaeed, Omid and Ryoo, Michael S. and Lin, Tsung-Yu}, title = {Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12977-12987} }

Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors
Nicolae-C?t?lin Ristea,
Florinel-Alin Croitoru,
Radu Tudor Ionescu,
Marius Popescu,
Fahad Shahbaz Khan,
Mubarak Shah
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ristea_2024_CVPR, author = {Ristea, Nicolae-C?t?lin and Croitoru, Florinel-Alin and Ionescu, Radu Tudor and Popescu, Marius and Khan, Fahad Shahbaz and Shah, Mubarak}, title = {Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15984-15995} }

Distilling Vision-Language Models on Millions of Videos
Yue Zhao,
Long Zhao,
Xingyi Zhou,
Jialin Wu,
Chun-Te Chu,
Hui Miao,
Florian Schroff,
Hartwig Adam,
Ting Liu,
Boqing Gong,
Philipp Krahenbuhl,
Liangzhe Yuan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yue and Zhao, Long and Zhou, Xingyi and Wu, Jialin and Chu, Chun-Te and Miao, Hui and Schroff, Florian and Adam, Hartwig and Liu, Ting and Gong, Boqing and Krahenbuhl, Philipp and Yuan, Liangzhe}, title = {Distilling Vision-Language Models on Millions of Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13106-13116} }

Generalized Predictive Model for Autonomous Driving
Jiazhi Yang,
Shenyuan Gao,
Yihang Qiu,
Li Chen,
Tianyu Li,
Bo Dai,
Kashyap Chitta,
Penghao Wu,
Jia Zeng,
Ping Luo,
Jun Zhang,
Andreas Geiger,
Yu Qiao,
Hongyang Li
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jiazhi and Gao, Shenyuan and Qiu, Yihang and Chen, Li and Li, Tianyu and Dai, Bo and Chitta, Kashyap and Wu, Penghao and Zeng, Jia and Luo, Ping and Zhang, Jun and Geiger, Andreas and Qiao, Yu and Li, Hongyang}, title = {Generalized Predictive Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14662-14672} }

FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation
Zijia Lu,
Ehsan Elhamifar
[pdf] [supp]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18175-18185} }

Test-Time Zero-Shot Temporal Action Localization
Benedetta Liberatori,
Alessandro Conti,
Paolo Rota,
Yiming Wang,
Elisa Ricci
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liberatori_2024_CVPR, author = {Liberatori, Benedetta and Conti, Alessandro and Rota, Paolo and Wang, Yiming and Ricci, Elisa}, title = {Test-Time Zero-Shot Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18720-18729} }

AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One
Mike Ranzinger,
Greg Heinrich,
Jan Kautz,
Pavlo Molchanov
[pdf] [supp]
[bibtex]
@InProceedings{Ranzinger_2024_CVPR, author = {Ranzinger, Mike and Heinrich, Greg and Kautz, Jan and Molchanov, Pavlo}, title = {AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12490-12500} }

FastMAC: Stochastic Spectral Sampling of Correspondence Graph
Yifei Zhang,
Hao Zhao,
Hongyang Li,
Siheng Chen
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yifei and Zhao, Hao and Li, Hongyang and Chen, Siheng}, title = {FastMAC: Stochastic Spectral Sampling of Correspondence Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17857-17867} }

FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning
Gihun Lee,
Minchan Jeong,
Sangmook Kim,
Jaehoon Oh,
Se-Young Yun
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Gihun and Jeong, Minchan and Kim, Sangmook and Oh, Jaehoon and Yun, Se-Young}, title = {FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12512-12522} }

A Category Agnostic Model for Visual Rearrangment
Yuyi Liu,
Xinhang Song,
Weijie Li,
Xiaohan Wang,
Shuqiang Jiang
[pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yuyi and Song, Xinhang and Li, Weijie and Wang, Xiaohan and Jiang, Shuqiang}, title = {A Category Agnostic Model for Visual Rearrangment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16457-16466} }

Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision
Mohammad Reza Hosseinzadeh Taher,
Michael B. Gotway,
Jianming Liang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taher_2024_CVPR, author = {Taher, Mohammad Reza Hosseinzadeh and Gotway, Michael B. and Liang, Jianming}, title = {Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11269-11281} }

Efficient Test-Time Adaptation of Vision-Language Models
Adilbek Karmanov,
Dayan Guan,
Shijian Lu,
Abdulmotaleb El Saddik,
Eric Xing
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karmanov_2024_CVPR, author = {Karmanov, Adilbek and Guan, Dayan and Lu, Shijian and El Saddik, Abdulmotaleb and Xing, Eric}, title = {Efficient Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14162-14171} }

Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs
Shengbang Tong,
Zhuang Liu,
Yuexiang Zhai,
Yi Ma,
Yann LeCun,
Saining Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2024_CVPR, author = {Tong, Shengbang and Liu, Zhuang and Zhai, Yuexiang and Ma, Yi and LeCun, Yann and Xie, Saining}, title = {Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9568-9578} }

Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation
Zhuangzhuang Chen,
Zhuonan Lai,
Jie Chen,
Jianqiang Li
[pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhuangzhuang and Lai, Zhuonan and Chen, Jie and Li, Jianqiang}, title = {Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12698-12708} }

RegionGPT: Towards Region Understanding Vision Language Model
Qiushan Guo,
Shalini De Mello,
Hongxu Yin,
Wonmin Byeon,
Ka Chun Cheung,
Yizhou Yu,
Ping Luo,
Sifei Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Qiushan and De Mello, Shalini and Yin, Hongxu and Byeon, Wonmin and Cheung, Ka Chun and Yu, Yizhou and Luo, Ping and Liu, Sifei}, title = {RegionGPT: Towards Region Understanding Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13796-13806} }

Error Detection in Egocentric Procedural Task Videos
Shih-Po Lee,
Zijia Lu,
Zekun Zhang,
Minh Hoai,
Ehsan Elhamifar
[pdf] [supp]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Shih-Po and Lu, Zijia and Zhang, Zekun and Hoai, Minh and Elhamifar, Ehsan}, title = {Error Detection in Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18655-18666} }

Uncertainty-Guided Never-Ending Learning to Drive
Lei Lai,
Eshed Ohn-Bar,
Sanjay Arora,
John Seon Keun Yi
[pdf]
[bibtex]
@InProceedings{Lai_2024_CVPR, author = {Lai, Lei and Ohn-Bar, Eshed and Arora, Sanjay and Yi, John Seon Keun}, title = {Uncertainty-Guided Never-Ending Learning to Drive}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15088-15098} }

FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion
George Cazenavette,
Avneesh Sud,
Thomas Leung,
Ben Usman
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cazenavette_2024_CVPR, author = {Cazenavette, George and Sud, Avneesh and Leung, Thomas and Usman, Ben}, title = {FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10759-10769} }

Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability
Yan Huang,
Zhang Zhang,
Qiang Wu,
Yi Zhong,
Liang Wang
[pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yan and Zhang, Zhang and Wu, Qiang and Zhong, Yi and Wang, Liang}, title = {Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17689-17699} }

Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval
Jiamian Wang,
Guohao Sun,
Pichao Wang,
Dongfang Liu,
Sohail Dianat,
Majid Rabbani,
Raghuveer Rao,
Zhiqiang Tao
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jiamian and Sun, Guohao and Wang, Pichao and Liu, Dongfang and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16551-16560} }

Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning
Rui Li,
Tobias Fischer,
Mattia Segu,
Marc Pollefeys,
Luc Van Gool,
Federico Tombari
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Rui and Fischer, Tobias and Segu, Mattia and Pollefeys, Marc and Van Gool, Luc and Tombari, Federico}, title = {Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9848-9858} }

Preserving Fairness Generalization in Deepfake Detection
Li Lin,
Xinan He,
Yan Ju,
Xin Wang,
Feng Ding,
Shu Hu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Li and He, Xinan and Ju, Yan and Wang, Xin and Ding, Feng and Hu, Shu}, title = {Preserving Fairness Generalization in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16815-16825} }

Structure-Aware Sparse-View X-ray 3D Reconstruction
Yuanhao Cai,
Jiahao Wang,
Alan Yuille,
Zongwei Zhou,
Angtian Wang
[pdf]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Yuanhao and Wang, Jiahao and Yuille, Alan and Zhou, Zongwei and Wang, Angtian}, title = {Structure-Aware Sparse-View X-ray 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11174-11183} }

Dexterous Grasp Transformer
Guo-Hao Xu,
Yi-Lin Wei,
Dian Zheng,
Xiao-Ming Wu,
Wei-Shi Zheng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Guo-Hao and Wei, Yi-Lin and Zheng, Dian and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Dexterous Grasp Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17933-17942} }

EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models
Sijie Cheng,
Zhicheng Guo,
Jingwen Wu,
Kechen Fang,
Peng Li,
Huaping Liu,
Yang Liu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Sijie and Guo, Zhicheng and Wu, Jingwen and Fang, Kechen and Li, Peng and Liu, Huaping and Liu, Yang}, title = {EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14291-14302} }

Hearing Anything Anywhere
Mason Long Wang,
Ryosuke Sawata,
Samuel Clarke,
Ruohan Gao,
Shangzhe Wu,
Jiajun Wu
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Mason Long and Sawata, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun}, title = {Hearing Anything Anywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11790-11799} }

PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation
Zhenyu Li,
Shariq Farooq Bhat,
Peter Wonka
[pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhenyu and Bhat, Shariq Farooq and Wonka, Peter}, title = {PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10016-10025} }

Retrieval-Augmented Egocentric Video Captioning
Jilan Xu,
Yifei Huang,
Junlin Hou,
Guo Chen,
Yuejie Zhang,
Rui Feng,
Weidi Xie
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jilan and Huang, Yifei and Hou, Junlin and Chen, Guo and Zhang, Yuejie and Feng, Rui and Xie, Weidi}, title = {Retrieval-Augmented Egocentric Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13525-13536} }

SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution
Zhixuan Liang,
Yao Mu,
Hengbo Ma,
Masayoshi Tomizuka,
Mingyu Ding,
Ping Luo
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Zhixuan and Mu, Yao and Ma, Hengbo and Tomizuka, Masayoshi and Ding, Mingyu and Luo, Ping}, title = {SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16467-16476} }

TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression
Ho-Joong Kim,
Jung-Ho Hong,
Heejo Kong,
Seong-Whan Lee
[pdf] [supp]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Ho-Joong and Hong, Jung-Ho and Kong, Heejo and Lee, Seong-Whan}, title = {TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18837-18846} }

PointBeV: A Sparse Approach for BeV Predictions
Loick Chambon,
Eloi Zablocki,
Mickaël Chen,
Florent Bartoccioni,
Patrick Pérez,
Matthieu Cord
[pdf] [supp]
[bibtex]
@InProceedings{Chambon_2024_CVPR, author = {Chambon, Loick and Zablocki, Eloi and Chen, Micka\"el and Bartoccioni, Florent and P\'erez, Patrick and Cord, Matthieu}, title = {PointBeV: A Sparse Approach for BeV Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15195-15204} }

From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior
Jaeho Moon,
Juan Luis Gonzalez Bello,
Byeongjun Kwon,
Munchurl Kim
[pdf] [supp]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Jaeho and Bello, Juan Luis Gonzalez and Kwon, Byeongjun and Kim, Munchurl}, title = {From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10519-10529} }

SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling
Ju-Hee Lee,
Je-Won Kang
[pdf] [supp]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Ju-Hee and Kang, Je-Won}, title = {SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13689-13699} }

Prompt Highlighter: Interactive Control for Multi-Modal LLMs
Yuechen Zhang,
Shengju Qian,
Bohao Peng,
Shu Liu,
Jiaya Jia
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuechen and Qian, Shengju and Peng, Bohao and Liu, Shu and Jia, Jiaya}, title = {Prompt Highlighter: Interactive Control for Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13215-13224} }

Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy
DaeJun Kang,
Dongsuk Kum,
Sanmin Kim
[pdf]
[bibtex]
@InProceedings{Kang_2024_CVPR, author = {Kang, DaeJun and Kum, Dongsuk and Kim, Sanmin}, title = {Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15438-15448} }

EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection
Xuanyu Zhang,
Runyi Li,
Jiwen Yu,
Youmin Xu,
Weiqi Li,
Jian Zhang
[pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuanyu and Li, Runyi and Yu, Jiwen and Xu, Youmin and Li, Weiqi and Zhang, Jian}, title = {EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11964-11974} }

FairRAG: Fair Human Generation via Fair Retrieval Augmentation
Robik Shrestha,
Yang Zou,
Qiuyu Chen,
Zhiheng Li,
Yusheng Xie,
Siqi Deng
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shrestha_2024_CVPR, author = {Shrestha, Robik and Zou, Yang and Chen, Qiuyu and Li, Zhiheng and Xie, Yusheng and Deng, Siqi}, title = {FairRAG: Fair Human Generation via Fair Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11996-12005} }

Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation
Xianghui Xie,
Bharat Lal Bhatnagar,
Jan Eric Lenssen,
Gerard Pons-Moll
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Xianghui and Bhatnagar, Bharat Lal and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10003-10015} }

Open-Vocabulary Video Anomaly Detection
Peng Wu,
Xuerong Zhou,
Guansong Pang,
Yujia Sun,
Jing Liu,
Peng Wang,
Yanning Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Peng and Zhou, Xuerong and Pang, Guansong and Sun, Yujia and Liu, Jing and Wang, Peng and Zhang, Yanning}, title = {Open-Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18297-18307} }

ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting
Chen Duan,
Pei Fu,
Shan Guo,
Qianyi Jiang,
Xiaoming Wei
[pdf] [arXiv]
[bibtex]
@InProceedings{Duan_2024_CVPR, author = {Duan, Chen and Fu, Pei and Guo, Shan and Jiang, Qianyi and Wei, Xiaoming}, title = {ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15587-15597} }

Epistemic Uncertainty Quantification For Pre-Trained Neural Networks
Hanjing Wang,
Qiang Ji
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hanjing and Ji, Qiang}, title = {Epistemic Uncertainty Quantification For Pre-Trained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11052-11061} }

Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving
Brian Yang,
Huangyuan Su,
Nikolaos Gkanatsios,
Tsung-Wei Ke,
Ayush Jain,
Jeff Schneider,
Katerina Fragkiadaki
[pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Brian and Su, Huangyuan and Gkanatsios, Nikolaos and Ke, Tsung-Wei and Jain, Ayush and Schneider, Jeff and Fragkiadaki, Katerina}, title = {Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15342-15353} }

MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation
Yuelong Li,
Yafei Mao,
Raja Bala,
Sunil Hadap
[pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuelong and Mao, Yafei and Bala, Raja and Hadap, Sunil}, title = {MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10476-10486} }

MonoCD: Monocular 3D Object Detection with Complementary Depths
Longfei Yan,
Pei Yan,
Shengzhou Xiong,
Xuanyu Xiang,
Yihua Tan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Longfei and Yan, Pei and Xiong, Shengzhou and Xiang, Xuanyu and Tan, Yihua}, title = {MonoCD: Monocular 3D Object Detection with Complementary Depths}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10248-10257} }

Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior
Zike Wu,
Pan Zhou,
Xuanyu Yi,
Xiaoding Yuan,
Hanwang Zhang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Zike and Zhou, Pan and Yi, Xuanyu and Yuan, Xiaoding and Zhang, Hanwang}, title = {Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9892-9902} }

ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation
Xiaoqi Li,
Mingxu Zhang,
Yiran Geng,
Haoran Geng,
Yuxing Long,
Yan Shen,
Renrui Zhang,
Jiaming Liu,
Hao Dong
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiaoqi and Zhang, Mingxu and Geng, Yiran and Geng, Haoran and Long, Yuxing and Shen, Yan and Zhang, Renrui and Liu, Jiaming and Dong, Hao}, title = {ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18061-18070} }

GLaMM: Pixel Grounding Large Multimodal Model
Hanoona Rasheed,
Muhammad Maaz,
Sahal Shaji,
Abdelrahman Shaker,
Salman Khan,
Hisham Cholakkal,
Rao M. Anwer,
Eric Xing,
Ming-Hsuan Yang,
Fahad S. Khan
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rasheed_2024_CVPR, author = {Rasheed, Hanoona and Maaz, Muhammad and Shaji, Sahal and Shaker, Abdelrahman and Khan, Salman and Cholakkal, Hisham and Anwer, Rao M. and Xing, Eric and Yang, Ming-Hsuan and Khan, Fahad S.}, title = {GLaMM: Pixel Grounding Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13009-13018} }

Incremental Residual Concept Bottleneck Models
Chenming Shang,
Shiji Zhou,
Hengyuan Zhang,
Xinzhe Ni,
Yujiu Yang,
Yuwang Wang
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shang_2024_CVPR, author = {Shang, Chenming and Zhou, Shiji and Zhang, Hengyuan and Ni, Xinzhe and Yang, Yujiu and Wang, Yuwang}, title = {Incremental Residual Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11030-11040} }

SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World
Kiana Ehsani,
Tanmay Gupta,
Rose Hendrix,
Jordi Salvador,
Luca Weihs,
Kuo-Hao Zeng,
Kunal Pratap Singh,
Yejin Kim,
Winson Han,
Alvaro Herrasti,
Ranjay Krishna,
Dustin Schwenk,
Eli VanderBilt,
Aniruddha Kembhavi
[pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ehsani_2024_CVPR, author = {Ehsani, Kiana and Gupta, Tanmay and Hendrix, Rose and Salvador, Jordi and Weihs, Luca and Zeng, Kuo-Hao and Singh, Kunal Pratap and Kim, Yejin and Han, Winson and Herrasti, Alvaro and Krishna, Ranjay and Schwenk, Dustin and VanderBilt, Eli and Kembhavi, Aniruddha}, title = {SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16238-16250} }

LoCoNet: Long-Short Context Network for Active Speaker Detection
Xizi Wang,