CVPR 2024 Open Access Repository

Papers

Back
Seeing the World through Your Eyes: Hadi Alzayer,

Kevin Zhang,

Brandon Feng,

Christopher A. Metzler,

Jia-Bin Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alzayer_2024_CVPR, author = {Alzayer, Hadi and Zhang, Kevin and Feng, Brandon and Metzler, Christopher A. and Huang, Jia-Bin}, title = {Seeing the World through Your Eyes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4864-4873} }
Ungeneralizable Examples: Jingwen Ye,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Wang, Xinchao}, title = {Ungeneralizable Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11944-11953} }
LaneCPP: Continuous 3D Lane Detection using Physical Priors: Maximilian Pittner,

Joel Janai,

Alexandru P. Condurache; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pittner_2024_CVPR, author = {Pittner, Maximilian and Janai, Joel and Condurache, Alexandru P.}, title = {LaneCPP: Continuous 3D Lane Detection using Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10639-10648} }
CityDreamer: Compositional Generative Model of Unbounded 3D Cities: Haozhe Xie,

Zhaoxi Chen,

Fangzhou Hong,

Ziwei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {CityDreamer: Compositional Generative Model of Unbounded 3D Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9666-9675} }
Action Detection via an Image Diffusion Process: Lin Geng Foo,

Tianjiao Li,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Foo_2024_CVPR, author = {Foo, Lin Geng and Li, Tianjiao and Rahmani, Hossein and Liu, Jun}, title = {Action Detection via an Image Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18351-18361} }
ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis: Xiangjun Gao,

Xiaoyu Li,

Chaopeng Zhang,

Qi Zhang,

Yanpei Cao,

Ying Shan,

Long Quan; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhang, Chaopeng and Zhang, Qi and Cao, Yanpei and Shan, Ying and Quan, Long}, title = {ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10084-10094} }
Streaming Dense Video Captioning: Xingyi Zhou,

Anurag Arnab,

Shyamal Buch,

Shen Yan,

Austin Myers,

Xuehan Xiong,

Arsha Nagrani,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xingyi and Arnab, Anurag and Buch, Shyamal and Yan, Shen and Myers, Austin and Xiong, Xuehan and Nagrani, Arsha and Schmid, Cordelia}, title = {Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18243-18252} }
Rethinking Inductive Biases for Surface Normal Estimation: Gwangbin Bae,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bae_2024_CVPR, author = {Bae, Gwangbin and Davison, Andrew J.}, title = {Rethinking Inductive Biases for Surface Normal Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9535-9545} }
Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity: Yuhang Chen,

Wenke Huang,

Mang Ye; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yuhang and Huang, Wenke and Ye, Mang}, title = {Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12077-12086} }
HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding: Trong-Thuan Nguyen,

Pha Nguyen,

Khoa Luu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Luu, Khoa}, title = {HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18384-18394} }
OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising: Haichao Zhang,

Yi Xu,

Hongsheng Lu,

Takayuki Shimizu,

Yun Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haichao and Xu, Yi and Lu, Hongsheng and Shimizu, Takayuki and Fu, Yun}, title = {OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14802-14811} }
FADES: Fair Disentanglement with Sensitive Relevance: Taeuk Jang,

Xiaoqian Wang; [pdf] [supp]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Taeuk and Wang, Xiaoqian}, title = {FADES: Fair Disentanglement with Sensitive Relevance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12067-12076} }
Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations: Kewei Wang,

Yizheng Wu,

Jun Cen,

Zhiyu Pan,

Xingyi Li,

Zhe Wang,

Zhiguo Cao,

Guosheng Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Kewei and Wu, Yizheng and Cen, Jun and Pan, Zhiyu and Li, Xingyi and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14638-14647} }
CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection: Mikhail Kennerley,

Jian-Gang Wang,

Bharadwaj Veeravalli,

Robby T. Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kennerley_2024_CVPR, author = {Kennerley, Mikhail and Wang, Jian-Gang and Veeravalli, Bharadwaj and Tan, Robby T.}, title = {CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16541-16550} }
An Empirical Study of Scaling Law for Scene Text Recognition: Miao Rang,

Zhenni Bi,

Chuanjian Liu,

Yunhe Wang,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Rang_2024_CVPR, author = {Rang, Miao and Bi, Zhenni and Liu, Chuanjian and Wang, Yunhe and Han, Kai}, title = {An Empirical Study of Scaling Law for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15619-15629} }
Text2Loc: 3D Point Cloud Localization from Natural Language: Yan Xia,

Letian Shi,

Zifeng Ding,

Joao F. Henriques,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Yan and Shi, Letian and Ding, Zifeng and Henriques, Joao F. and Cremers, Daniel}, title = {Text2Loc: 3D Point Cloud Localization from Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14958-14967} }
Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework: Vu Minh Hieu Phan,

Yutong Xie,

Yuankai Qi,

Lingqiao Liu,

Liyang Liu,

Bowen Zhang,

Zhibin Liao,

Qi Wu,

Minh-Son To,

Johan W. Verjans; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Phan_2024_CVPR, author = {Phan, Vu Minh Hieu and Xie, Yutong and Qi, Yuankai and Liu, Lingqiao and Liu, Liyang and Zhang, Bowen and Liao, Zhibin and Wu, Qi and To, Minh-Son and Verjans, Johan W.}, title = {Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11492-11501} }
Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views: Ziwei Zhao,

Yuchen Wang,

Chuhua Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ziwei and Wang, Yuchen and Wang, Chuhua}, title = {Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16477-16487} }
Desigen: A Pipeline for Controllable Design Template Generation: Haohan Weng,

Danqing Huang,

Yu Qiao,

Zheng Hu,

Chin-Yew Lin,

Tong Zhang,

C. L. Philip Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Weng_2024_CVPR, author = {Weng, Haohan and Huang, Danqing and Qiao, Yu and Hu, Zheng and Lin, Chin-Yew and Zhang, Tong and Chen, C. L. Philip}, title = {Desigen: A Pipeline for Controllable Design Template Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12721-12732} }
Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers: Sanghyeok Lee,

Joonmyung Choi,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15741-15750} }
ViewFusion: Towards Multi-View Consistency via Interpolated Denoising: Xianghui Yang,

Yan Zuo,

Sameera Ramasinghe,

Loris Bazzani,

Gil Avraham,

Anton van den Hengel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xianghui and Zuo, Yan and Ramasinghe, Sameera and Bazzani, Loris and Avraham, Gil and van den Hengel, Anton}, title = {ViewFusion: Towards Multi-View Consistency via Interpolated Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9870-9880} }
SketchINR: A First Look into Sketches as Implicit Neural Representations: Hmrishav Bandyopadhyay,

Ayan Kumar Bhunia,

Pinaki Nath Chowdhury,

Aneeshan Sain,

Tao Xiang,

Timothy Hospedales,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Bhunia, Ayan Kumar and Chowdhury, Pinaki Nath and Sain, Aneeshan and Xiang, Tao and Hospedales, Timothy and Song, Yi-Zhe}, title = {SketchINR: A First Look into Sketches as Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12565-12574} }
MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images: Junwen Huang,

Hao Yu,

Kuan-Ting Yu,

Nassir Navab,

Slobodan Ilic,

Benjamin Busam; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Junwen and Yu, Hao and Yu, Kuan-Ting and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10095-10105} }
Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization: Ye Chen,

Bingbing Ni,

Jinfan Liu,

Xiaoyang Huang,

Xuanhong Chen; [pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ye and Ni, Bingbing and Liu, Jinfan and Huang, Xiaoyang and Chen, Xuanhong}, title = {Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15877-15886} }
EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything: Yunyang Xiong,

Bala Varadarajan,

Lemeng Wu,

Xiaoyu Xiang,

Fanyi Xiao,

Chenchen Zhu,

Xiaoliang Dai,

Dilin Wang,

Fei Sun,

Forrest Iandola,

Raghuraman Krishnamoorthi,

Vikas Chandra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yunyang and Varadarajan, Bala and Wu, Lemeng and Xiang, Xiaoyu and Xiao, Fanyi and Zhu, Chenchen and Dai, Xiaoliang and Wang, Dilin and Sun, Fei and Iandola, Forrest and Krishnamoorthi, Raghuraman and Chandra, Vikas}, title = {EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16111-16121} }
ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles: Jiawei Zhang,

Chejian Xu,

Bo Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiawei and Xu, Chejian and Li, Bo}, title = {ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15459-15469} }
Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge: Bo Zou,

Shaofeng Wang,

Hao Liu,

Gaoyue Sun,

Yajie Wang,

FeiFei Zuo,

Chengbin Quan,

Youjian Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Wang, Shaofeng and Liu, Hao and Sun, Gaoyue and Wang, Yajie and Zuo, FeiFei and Quan, Chengbin and Zhao, Youjian}, title = {Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11601-11610} }
Bayesian Diffusion Models for 3D Shape Reconstruction: Haiyang Xu,

Yu Lei,

Zeyuan Chen,

Xiang Zhang,

Yue Zhao,

Yilin Wang,

Zhuowen Tu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Haiyang and Lei, Yu and Chen, Zeyuan and Zhang, Xiang and Zhao, Yue and Wang, Yilin and Tu, Zhuowen}, title = {Bayesian Diffusion Models for 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10628-10638} }
CrossKD: Cross-Head Knowledge Distillation for Object Detection: Jiabao Wang,

Yuming Chen,

Zhaohui Zheng,

Xiang Li,

Ming-Ming Cheng,

Qibin Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jiabao and Chen, Yuming and Zheng, Zhaohui and Li, Xiang and Cheng, Ming-Ming and Hou, Qibin}, title = {CrossKD: Cross-Head Knowledge Distillation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16520-16530} }
Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation: Xin Fan,

Xiaolin Wang,

Jiaxin Gao,

Jia Wang,

Zhongxuan Luo,

Risheng Liu; [pdf] [supp]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Xin and Wang, Xiaolin and Gao, Jiaxin and Wang, Jia and Luo, Zhongxuan and Liu, Risheng}, title = {Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11726-11735} }
EscherNet: A Generative Model for Scalable View Synthesis: Xin Kong,

Shikun Liu,

Xiaoyang Lyu,

Marwan Taher,

Xiaojuan Qi,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Xin and Liu, Shikun and Lyu, Xiaoyang and Taher, Marwan and Qi, Xiaojuan and Davison, Andrew J.}, title = {EscherNet: A Generative Model for Scalable View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9503-9513} }
MeaCap: Memory-Augmented Zero-shot Image Captioning: Zequn Zeng,

Yan Xie,

Hao Zhang,

Chiyu Chen,

Bo Chen,

Zhengjue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Zequn and Xie, Yan and Zhang, Hao and Chen, Chiyu and Chen, Bo and Wang, Zhengjue}, title = {MeaCap: Memory-Augmented Zero-shot Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14100-14110} }
Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion: Hao Ai,

Lin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Ai_2024_CVPR, author = {Ai, Hao and Wang, Lin}, title = {Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9926-9935} }
Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation: Qiyuan Dai,

Sibei Yang; [pdf] [arXiv]
[bibtex]
@InProceedings{Dai_2024_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13711-13722} }
EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition: Xu Zheng,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xu and Wang, Lin}, title = {EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17448-17458} }
CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data: Wei Fang,

Yuxing Tang,

Heng Guo,

Mingze Yuan,

Tony C. W. Mok,

Ke Yan,

Jiawen Yao,

Xin Chen,

Zaiyi Liu,

Le Lu,

Ling Zhang,

Minfeng Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Wei and Tang, Yuxing and Guo, Heng and Yuan, Mingze and Mok, Tony C. W. and Yan, Ke and Yao, Jiawen and Chen, Xin and Liu, Zaiyi and Lu, Le and Zhang, Ling and Xu, Minfeng}, title = {CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11631-11641} }
Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models: Xinpeng Ding,

Jianhua Han,

Hang Xu,

Xiaodan Liang,

Wei Zhang,

Xiaomeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Xinpeng and Han, Jianhua and Xu, Hang and Liang, Xiaodan and Zhang, Wei and Li, Xiaomeng}, title = {Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13668-13677} }
Extreme Point Supervised Instance Segmentation: Hyeonjun Lee,

Sehyun Hwang,

Suha Kwak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeonjun and Hwang, Sehyun and Kwak, Suha}, title = {Extreme Point Supervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17212-17222} }
MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant: Chenlu Zhan,

Yu Lin,

Gaoang Wang,

Hongwei Wang,

Jian Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhan_2024_CVPR, author = {Zhan, Chenlu and Lin, Yu and Wang, Gaoang and Wang, Hongwei and Wu, Jian}, title = {MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11502-11512} }
Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction: Devikalyan Das,

Christopher Wewer,

Raza Yunus,

Eddy Ilg,

Jan Eric Lenssen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Das_2024_CVPR, author = {Das, Devikalyan and Wewer, Christopher and Yunus, Raza and Ilg, Eddy and Lenssen, Jan Eric}, title = {Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10715-10725} }
PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness: Siyao Jiang,

Huisi Wu,

Junyang Chen,

Qin Zhang,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Siyao and Wu, Huisi and Chen, Junyang and Zhang, Qin and Qin, Jing}, title = {PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11418-11427} }
ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More: Jiazhou Zhou,

Xu Zheng,

Yuanhuiyi Lyu,

Lin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jiazhou and Zheng, Xu and Lyu, Yuanhuiyi and Wang, Lin}, title = {ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18633-18643} }
Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping: Hyeongjun Kwon,

Jinhyun Jang,

Jin Kim,

Kwonyoung Kim,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kwon_2024_CVPR, author = {Kwon, Hyeongjun and Jang, Jinhyun and Kim, Jin and Kim, Kwonyoung and Sohn, Kwanghoon}, title = {Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17364-17374} }
ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks: Kai Han,

Yunhe Wang,

Jianyuan Guo,

Enhua Wu; [pdf]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Kai and Wang, Yunhe and Guo, Jianyuan and Wu, Enhua}, title = {ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15751-15761} }
Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation: Bingxin Ke,

Anton Obukhov,

Shengyu Huang,

Nando Metzger,

Rodrigo Caye Daudt,

Konrad Schindler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ke_2024_CVPR, author = {Ke, Bingxin and Obukhov, Anton and Huang, Shengyu and Metzger, Nando and Daudt, Rodrigo Caye and Schindler, Konrad}, title = {Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9492-9502} }
LLMs are Good Sign Language Translators: Jia Gong,

Lin Geng Foo,

Yixuan He,

Hossein Rahmani,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Jia and Foo, Lin Geng and He, Yixuan and Rahmani, Hossein and Liu, Jun}, title = {LLMs are Good Sign Language Translators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18362-18372} }
Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer: Wenqiao Zhang,

Zheqi Lv,

Hao Zhou,

Jia-Wei Liu,

Juncheng Li,

Mengze Li,

Yunfei Li,

Dongping Zhang,

Yueting Zhuang,

Siliang Tang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqiao and Lv, Zheqi and Zhou, Hao and Liu, Jia-Wei and Li, Juncheng and Li, Mengze and Li, Yunfei and Zhang, Dongping and Zhuang, Yueting and Tang, Siliang}, title = {Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16751-16761} }
Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification: Zhenyu Cui,

Jiahuan Zhou,

Xun Wang,

Manyu Zhu,

Yuxin Peng; [pdf] [supp]
[bibtex]
@InProceedings{Cui_2024_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Wang, Xun and Zhu, Manyu and Peng, Yuxin}, title = {Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16614-16623} }
CORES: Convolutional Response-based Score for Out-of-distribution Detection: Keke Tang,

Chao Hou,

Weilong Peng,

Runnan Chen,

Peican Zhu,

Wenping Wang,

Zhihong Tian; [pdf]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Chen, Runnan and Zhu, Peican and Wang, Wenping and Tian, Zhihong}, title = {CORES: Convolutional Response-based Score for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10916-10925} }
Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features: Youngmin Chung,

Ji Hun Ha,

Kyeong Chan Im,

Joo Sang Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chung_2024_CVPR, author = {Chung, Youngmin and Ha, Ji Hun and Im, Kyeong Chan and Lee, Joo Sang}, title = {Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11591-11600} }
Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion: Su Sun,

Cheng Zhao,

Yuliang Guo,

Ruoyu Wang,

Xinyu Huang,

Yingjie Victor Chen,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Su and Zhao, Cheng and Guo, Yuliang and Wang, Ruoyu and Huang, Xinyu and Chen, Yingjie Victor and Ren, Liu}, title = {Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12744-12753} }
VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding: Syed Talal Wasim,

Muzammal Naseer,

Salman Khan,

Ming-Hsuan Yang,

Fahad Shahbaz Khan; [pdf]
[bibtex]
@InProceedings{Wasim_2024_CVPR, author = {Wasim, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz}, title = {VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18909-18918} }
Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts: Jiayi Chen,

Benteng Ma,

Hengfei Cui,

Yong Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Jiayi and Ma, Benteng and Cui, Hengfei and Xia, Yong}, title = {Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11439-11449} }
ViTamin: Designing Scalable Vision Models in the Vision-Language Era: Jieneng Chen,

Qihang Yu,

Xiaohui Shen,

Alan Yuille,

Liang-Chieh Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Jieneng and Yu, Qihang and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {ViTamin: Designing Scalable Vision Models in the Vision-Language Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12954-12966} }
Seeing the Unseen: Visual Common Sense for Semantic Placement: Ram Ramrakhya,

Aniruddha Kembhavi,

Dhruv Batra,

Zsolt Kira,

Kuo-Hao Zeng,

Luca Weihs; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramrakhya_2024_CVPR, author = {Ramrakhya, Ram and Kembhavi, Aniruddha and Batra, Dhruv and Kira, Zsolt and Zeng, Kuo-Hao and Weihs, Luca}, title = {Seeing the Unseen: Visual Common Sense for Semantic Placement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16273-16283} }
LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction: Bo Zou,

Chao Yang,

Yu Qiao,

Chengbin Quan,

Youjian Zhao; [pdf] [supp]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Yang, Chao and Qiao, Yu and Quan, Chengbin and Zhao, Youjian}, title = {LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14089-14099} }
Steerers: A Framework for Rotation Equivariant Keypoint Descriptors: Georg Bökman,

Johan Edstedt,

Michael Felsberg,

Fredrik Kahl; [pdf] [supp]
[bibtex]
@InProceedings{Bokman_2024_CVPR, author = {B\"okman, Georg and Edstedt, Johan and Felsberg, Michael and Kahl, Fredrik}, title = {Steerers: A Framework for Rotation Equivariant Keypoint Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4885-4895} }
Efficient Dataset Distillation via Minimax Diffusion: Jianyang Gu,

Saeed Vahidian,

Vyacheslav Kungurtsev,

Haonan Wang,

Wei Jiang,

Yang You,

Yiran Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Jianyang and Vahidian, Saeed and Kungurtsev, Vyacheslav and Wang, Haonan and Jiang, Wei and You, Yang and Chen, Yiran}, title = {Efficient Dataset Distillation via Minimax Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15793-15803} }
Posterior Distillation Sampling: Juil Koo,

Chanho Park,

Minhyuk Sung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koo_2024_CVPR, author = {Koo, Juil and Park, Chanho and Sung, Minhyuk}, title = {Posterior Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13352-13361} }
HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields: Haozhe Qi,

Chen Zhao,

Mathieu Salzmann,

Alexander Mathis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Haozhe and Zhao, Chen and Salzmann, Mathieu and Mathis, Alexander}, title = {HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10392-10402} }
DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis: Yuming Gu,

Hongyi Xu,

You Xie,

Guoxian Song,

Yichun Shi,

Di Chang,

Jing Yang,

Linjie Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Yuming and Xu, Hongyi and Xie, You and Song, Guoxian and Shi, Yichun and Chang, Di and Yang, Jing and Luo, Linjie}, title = {DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10456-10465} }
H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration: Morteza Ghahremani,

Mohammad Khateri,

Bailiang Jian,

Benedikt Wiestler,

Ehsan Adeli,

Christian Wachinger; [pdf] [supp]
[bibtex]
@InProceedings{Ghahremani_2024_CVPR, author = {Ghahremani, Morteza and Khateri, Mohammad and Jian, Bailiang and Wiestler, Benedikt and Adeli, Ehsan and Wachinger, Christian}, title = {H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11513-11523} }
VideoLLM-online: Online Video Large Language Model for Streaming Video: Joya Chen,

Zhaoyang Lv,

Shiwei Wu,

Kevin Qinghong Lin,

Chenan Song,

Difei Gao,

Jia-Wei Liu,

Ziteng Gao,

Dongxing Mao,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Joya and Lv, Zhaoyang and Wu, Shiwei and Lin, Kevin Qinghong and Song, Chenan and Gao, Difei and Liu, Jia-Wei and Gao, Ziteng and Mao, Dongxing and Shou, Mike Zheng}, title = {VideoLLM-online: Online Video Large Language Model for Streaming Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18407-18418} }
Towards Better Vision-Inspired Vision-Language Models: Yun-Hao Cao,

Kaixiang Ji,

Ziyuan Huang,

Chuanyang Zheng,

Jiajia Liu,

Jian Wang,

Jingdong Chen,

Ming Yang; [pdf]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Yun-Hao and Ji, Kaixiang and Huang, Ziyuan and Zheng, Chuanyang and Liu, Jiajia and Wang, Jian and Chen, Jingdong and Yang, Ming}, title = {Towards Better Vision-Inspired Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13537-13547} }
VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection: Zihua Liu,

Hiroki Sakuma,

Masatoshi Okutomi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Zihua and Sakuma, Hiroki and Okutomi, Masatoshi}, title = {VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17354-17363} }
RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation: Zeyuan Yang,

Jiageng Liu,

Peihao Chen,

Anoop Cherian,

Tim K. Marks,

Jonathan Le Roux,

Chuang Gan; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}, title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16251-16261} }
Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection: Wenjun Hui,

Zhenfeng Zhu,

Shuai Zheng,

Yao Zhao; [pdf]
[bibtex]
@InProceedings{Hui_2024_CVPR, author = {Hui, Wenjun and Zhu, Zhenfeng and Zheng, Shuai and Zhao, Yao}, title = {Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19058-19067} }
Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection: Huan Liu,

Zichang Tan,

Chuangchuang Tan,

Yunchao Wei,

Jingdong Wang,

Yao Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Huan and Tan, Zichang and Tan, Chuangchuang and Wei, Yunchao and Wang, Jingdong and Zhao, Yao}, title = {Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10770-10780} }
PostureHMR: Posture Transformation for 3D Human Mesh Recovery: Yu-Pei Song,

Xiao Wu,

Zhaoquan Yuan,

Jian-Jun Qiao,

Qiang Peng; [pdf] [supp]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Yu-Pei and Wu, Xiao and Yuan, Zhaoquan and Qiao, Jian-Jun and Peng, Qiang}, title = {PostureHMR: Posture Transformation for 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9732-9741} }
Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis: Xin Zhou,

Dingkang Liang,

Wei Xu,

Xingkui Zhu,

Yihan Xu,

Zhikang Zou,

Xiang Bai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xin and Liang, Dingkang and Xu, Wei and Zhu, Xingkui and Xu, Yihan and Zou, Zhikang and Bai, Xiang}, title = {Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14707-14717} }
Wonder3D: Single Image to 3D using Cross-Domain Diffusion: Xiaoxiao Long,

Yuan-Chen Guo,

Cheng Lin,

Yuan Liu,

Zhiyang Dou,

Lingjie Liu,

Yuexin Ma,

Song-Hai Zhang,

Marc Habermann,

Christian Theobalt,

Wenping Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Long_2024_CVPR, author = {Long, Xiaoxiao and Guo, Yuan-Chen and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Liu, Lingjie and Ma, Yuexin and Zhang, Song-Hai and Habermann, Marc and Theobalt, Christian and Wang, Wenping}, title = {Wonder3D: Single Image to 3D using Cross-Domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9970-9980} }
RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D: Lingteng Qiu,

Guanying Chen,

Xiaodong Gu,

Qi Zuo,

Mutian Xu,

Yushuang Wu,

Weihao Yuan,

Zilong Dong,

Liefeng Bo,

Xiaoguang Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2024_CVPR, author = {Qiu, Lingteng and Chen, Guanying and Gu, Xiaodong and Zuo, Qi and Xu, Mutian and Wu, Yushuang and Yuan, Weihao and Dong, Zilong and Bo, Liefeng and Han, Xiaoguang}, title = {RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9914-9925} }
Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions: Zeyu Han,

Fangrui Zhu,

Qianru Lao,

Huaizu Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Zeyu and Zhu, Fangrui and Lao, Qianru and Jiang, Huaizu}, title = {Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14364-14374} }
Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers: Zi-Xin Zou,

Zhipeng Yu,

Yuan-Chen Guo,

Yangguang Li,

Ding Liang,

Yan-Pei Cao,

Song-Hai Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zou_2024_CVPR, author = {Zou, Zi-Xin and Yu, Zhipeng and Guo, Yuan-Chen and Li, Yangguang and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10324-10335} }
WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights: Youngdong Jang,

Dong In Lee,

MinHyuk Jang,

Jong Wook Kim,

Feng Yang,

Sangpil Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Youngdong and Lee, Dong In and Jang, MinHyuk and Kim, Jong Wook and Yang, Feng and Kim, Sangpil}, title = {WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12087-12097} }
Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction: Mi-Gyeong Gwon,

Gi-Mun Um,

Won-Sik Cheong,

Wonjun Kim; [pdf] [supp]
[bibtex]
@InProceedings{Gwon_2024_CVPR, author = {Gwon, Mi-Gyeong and Um, Gi-Mun and Cheong, Won-Sik and Kim, Wonjun}, title = {Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10553-10562} }
Robust Noisy Correspondence Learning with Equivariant Similarity Consistency: Yuchen Yang,

Likai Wang,

Erkun Yang,

Cheng Deng; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yuchen and Wang, Likai and Yang, Erkun and Deng, Cheng}, title = {Robust Noisy Correspondence Learning with Equivariant Similarity Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17700-17709} }
Compositional Video Understanding with Spatiotemporal Structure-based Transformers: Hoyeoung Yun,

Jinwoo Ahn,

Minseo Kim,

Eun-Sol Kim; [pdf] [supp]
[bibtex]
@InProceedings{Yun_2024_CVPR, author = {Yun, Hoyeoung and Ahn, Jinwoo and Kim, Minseo and Kim, Eun-Sol}, title = {Compositional Video Understanding with Spatiotemporal Structure-based Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18751-18760} }
3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation: Xingguang Zhong,

Yue Pan,

Cyrill Stachniss,

Jens Behley; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhong_2024_CVPR, author = {Zhong, Xingguang and Pan, Yue and Stachniss, Cyrill and Behley, Jens}, title = {3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15417-15427} }
What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions: Brian Chen,

Nina Shvetsova,

Andrew Rouditchenko,

Daniel Kondermann,

Samuel Thomas,

Shih-Fu Chang,

Rogerio Feris,

James Glass,

Hilde Kuehne; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Brian and Shvetsova, Nina and Rouditchenko, Andrew and Kondermann, Daniel and Thomas, Samuel and Chang, Shih-Fu and Feris, Rogerio and Glass, James and Kuehne, Hilde}, title = {What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18419-18429} }
FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects: Bowen Wen,

Wei Yang,

Jan Kautz,

Stan Birchfield; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Bowen and Yang, Wei and Kautz, Jan and Birchfield, Stan}, title = {FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17868-17879} }
Hyperbolic Anomaly Detection: Huimin Li,

Zhentao Chen,

Yunhao Xu,

Junlin Hu; [pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Huimin and Chen, Zhentao and Xu, Yunhao and Hu, Junlin}, title = {Hyperbolic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17511-17520} }
VLP: Vision Language Planning for Autonomous Driving: Chenbin Pan,

Burhaneddin Yaman,

Tommaso Nesti,

Abhirup Mallik,

Alessandro G Allievi,

Senem Velipasalar,

Liu Ren; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Nesti, Tommaso and Mallik, Abhirup and Allievi, Alessandro G and Velipasalar, Senem and Ren, Liu}, title = {VLP: Vision Language Planning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14760-14769} }
ProMark: Proactive Diffusion Watermarking for Causal Attribution: Vishal Asnani,

John Collomosse,

Tu Bui,

Xiaoming Liu,

Shruti Agarwal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Asnani_2024_CVPR, author = {Asnani, Vishal and Collomosse, John and Bui, Tu and Liu, Xiaoming and Agarwal, Shruti}, title = {ProMark: Proactive Diffusion Watermarking for Causal Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10802-10811} }
Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering: Zaid Khan,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and Fu, Yun}, title = {Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10854-10863} }
Implicit Motion Function: Yue Gao,

Jiahao Li,

Lei Chu,

Yan Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yue and Li, Jiahao and Chu, Lei and Lu, Yan}, title = {Implicit Motion Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19278-19289} }
MultiDiff: Consistent Novel View Synthesis from a Single Image: Norman Müller,

Katja Schwarz,

Barbara Rössle,

Lorenzo Porzi,

Samuel Rota Bulò,

Matthias Nießner,

Peter Kontschieder; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2024_CVPR, author = {M\"uller, Norman and Schwarz, Katja and R\"ossle, Barbara and Porzi, Lorenzo and Bul\`o, Samuel Rota and Nie{\ss}ner, Matthias and Kontschieder, Peter}, title = {MultiDiff: Consistent Novel View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10258-10268} }
Atom-Level Optical Chemical Structure Recognition with Limited Supervision: Martijn Oldenhof,

Edward De Brouwer,

Adam Arany,

Yves Moreau; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Oldenhof_2024_CVPR, author = {Oldenhof, Martijn and De Brouwer, Edward and Arany, Adam and Moreau, Yves}, title = {Atom-Level Optical Chemical Structure Recognition with Limited Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17669-17678} }
LiDAR-based Person Re-identification: Wenxuan Guo,

Zhiyu Pan,

Yingping Liang,

Ziheng Xi,

Zhicheng Zhong,

Jianjiang Feng,

Jie Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Wenxuan and Pan, Zhiyu and Liang, Yingping and Xi, Ziheng and Zhong, Zhicheng and Feng, Jianjiang and Zhou, Jie}, title = {LiDAR-based Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17437-17447} }
Model Adaptation for Time Constrained Embodied Control: Jaehyun Song,

Minjong Yoo,

Honguk Woo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Jaehyun and Yoo, Minjong and Woo, Honguk}, title = {Model Adaptation for Time Constrained Embodied Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16499-16508} }
ActiveDC: Distribution Calibration for Active Finetuning: Wenshuai Xu,

Zhenghui Hu,

Yu Lu,

Jinzhou Meng,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Wenshuai and Hu, Zhenghui and Lu, Yu and Meng, Jinzhou and Liu, Qingjie and Wang, Yunhong}, title = {ActiveDC: Distribution Calibration for Active Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16996-17005} }
Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling: Jianan Fan,

Dongnan Liu,

Hang Chang,

Heng Huang,

Mei Chen,

Weidong Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Jianan and Liu, Dongnan and Chang, Hang and Huang, Heng and Chen, Mei and Cai, Weidong}, title = {Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11524-11534} }
Communication-Efficient Federated Learning with Accelerated Client Gradient: Geeho Kim,

Jinkyu Kim,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Geeho and Kim, Jinkyu and Han, Bohyung}, title = {Communication-Efficient Federated Learning with Accelerated Client Gradient}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12385-12394} }
LLMs are Good Action Recognizers: Haoxuan Qu,

Yujun Cai,

Jun Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {LLMs are Good Action Recognizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18395-18406} }
Interactive Continual Learning: Fast and Slow Thinking: Biqing Qi,

Xinquan Chen,

Junqi Gao,

Dong Li,

Jianxing Liu,

Ligang Wu,

Bowen Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Biqing and Chen, Xinquan and Gao, Junqi and Li, Dong and Liu, Jianxing and Wu, Ligang and Zhou, Bowen}, title = {Interactive Continual Learning: Fast and Slow Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12882-12892} }
Towards Learning a Generalist Model for Embodied Navigation: Duo Zheng,

Shijia Huang,

Lin Zhao,

Yiwu Zhong,

Liwei Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Duo and Huang, Shijia and Zhao, Lin and Zhong, Yiwu and Wang, Liwei}, title = {Towards Learning a Generalist Model for Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13624-13634} }
Splatter Image: Ultra-Fast Single-View 3D Reconstruction: Stanislaw Szymanowicz,

Chrisitian Rupprecht,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Szymanowicz_2024_CVPR, author = {Szymanowicz, Stanislaw and Rupprecht, Chrisitian and Vedaldi, Andrea}, title = {Splatter Image: Ultra-Fast Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10208-10217} }
Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use: Imad Eddine Toubal,

Aditya Avinash,

Neil Gordon Alldrin,

Jan Dlabal,

Wenlei Zhou,

Enming Luo,

Otilia Stretcu,

Hao Xiong,

Chun-Ta Lu,

Howard Zhou,

Ranjay Krishna,

Ariel Fuxman,

Tom Duerig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Toubal_2024_CVPR, author = {Toubal, Imad Eddine and Avinash, Aditya and Alldrin, Neil Gordon and Dlabal, Jan and Zhou, Wenlei and Luo, Enming and Stretcu, Otilia and Xiong, Hao and Lu, Chun-Ta and Zhou, Howard and Krishna, Ranjay and Fuxman, Ariel and Duerig, Tom}, title = {Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17553-17563} }
GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement: Linfang Zheng,

Tze Ho Elden Tse,

Chen Wang,

Yinghan Sun,

Hua Chen,

Ales Leonardis,

Wei Zhang,

Hyung Jin Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Linfang and Tse, Tze Ho Elden and Wang, Chen and Sun, Yinghan and Chen, Hua and Leonardis, Ales and Zhang, Wei and Chang, Hyung Jin}, title = {GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10693-10703} }
Learning Group Activity Features Through Person Attribute Prediction: Chihiro Nakatani,

Hiroaki Kawashima,

Norimichi Ukita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nakatani_2024_CVPR, author = {Nakatani, Chihiro and Kawashima, Hiroaki and Ukita, Norimichi}, title = {Learning Group Activity Features Through Person Attribute Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18233-18242} }
Plug-and-Play Diffusion Distillation: Yi-Ting Hsiao,

Siavash Khodadadeh,

Kevin Duarte,

Wei-An Lin,

Hui Qu,

Mingi Kwon,

Ratheesh Kalarot; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Yi-Ting and Khodadadeh, Siavash and Duarte, Kevin and Lin, Wei-An and Qu, Hui and Kwon, Mingi and Kalarot, Ratheesh}, title = {Plug-and-Play Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13743-13752} }
MindBridge: A Cross-Subject Brain Decoding Framework: Shizun Wang,

Songhua Liu,

Zhenxiong Tan,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shizun and Liu, Songhua and Tan, Zhenxiong and Wang, Xinchao}, title = {MindBridge: A Cross-Subject Brain Decoding Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11333-11342} }
MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning: Chaoyi Zhang,

Kevin Lin,

Zhengyuan Yang,

Jianfeng Wang,

Linjie Li,

Chung-Ching Lin,

Zicheng Liu,

Lijuan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chaoyi and Lin, Kevin and Yang, Zhengyuan and Wang, Jianfeng and Li, Linjie and Lin, Chung-Ching and Liu, Zicheng and Wang, Lijuan}, title = {MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13647-13657} }
Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation: Xiyi Chen,

Marko Mihajlovic,

Shaofei Wang,

Sergey Prokudin,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xiyi and Mihajlovic, Marko and Wang, Shaofei and Prokudin, Sergey and Tang, Siyu}, title = {Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10359-10370} }
Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI: Sean I. Young,

Yael Balbastre,

Bruce Fischl,

Polina Golland,

Juan Eugenio Iglesias; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Young_2024_CVPR, author = {Young, Sean I. and Balbastre, Yael and Fischl, Bruce and Golland, Polina and Iglesias, Juan Eugenio}, title = {Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11535-11545} }
Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model: Zhicai Wang,

Longhui Wei,

Tan Wang,

Heyu Chen,

Yanbin Hao,

Xiang Wang,

Xiangnan He,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zhicai and Wei, Longhui and Wang, Tan and Chen, Heyu and Hao, Yanbin and Wang, Xiang and He, Xiangnan and Tian, Qi}, title = {Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17223-17233} }
Alpha-CLIP: A CLIP Model Focusing on Wherever You Want: Zeyi Sun,

Ye Fang,

Tong Wu,

Pan Zhang,

Yuhang Zang,

Shu Kong,

Yuanjun Xiong,

Dahua Lin,

Jiaqi Wang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Zeyi and Fang, Ye and Wu, Tong and Zhang, Pan and Zang, Yuhang and Kong, Shu and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Alpha-CLIP: A CLIP Model Focusing on Wherever You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13019-13029} }
ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association: Shuxiao Ding,

Lukas Schneider,

Marius Cordts,

Juergen Gall; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Shuxiao and Schneider, Lukas and Cordts, Marius and Gall, Juergen}, title = {ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15184-15194} }
Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation: Lior Talker,

Aviad Cohen,

Erez Yosef,

Alexandra Dana,

Michael Dinerstein; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Talker_2024_CVPR, author = {Talker, Lior and Cohen, Aviad and Yosef, Erez and Dana, Alexandra and Dinerstein, Michael}, title = {Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10606-10616} }
Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models: Hongjie Wang,

Difan Liu,

Yan Kang,

Yijun Li,

Zhe Lin,

Niraj K. Jha,

Yuchen Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Liu, Difan and Kang, Yan and Li, Yijun and Lin, Zhe and Jha, Niraj K. and Liu, Yuchen}, title = {Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16080-16089} }
CPR: Retrieval Augmented Generation for Copyright Protection: Aditya Golatkar,

Alessandro Achille,

Luca Zancato,

Yu-Xiang Wang,

Ashwin Swaminathan,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Golatkar_2024_CVPR, author = {Golatkar, Aditya and Achille, Alessandro and Zancato, Luca and Wang, Yu-Xiang and Swaminathan, Ashwin and Soatto, Stefano}, title = {CPR: Retrieval Augmented Generation for Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12374-12384} }
Vision-and-Language Navigation via Causal Learning: Liuyi Wang,

Zongtao He,

Ronghao Dang,

Mengjiao Shen,

Chengju Liu,

Qijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Liuyi and He, Zongtao and Dang, Ronghao and Shen, Mengjiao and Liu, Chengju and Chen, Qijun}, title = {Vision-and-Language Navigation via Causal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13139-13150} }
Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation: Wenxuan Wang,

Tongtian Yue,

Yisi Zhang,

Longteng Guo,

Xingjian He,

Xinlong Wang,

Jing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Wenxuan and Yue, Tongtian and Zhang, Yisi and Guo, Longteng and He, Xingjian and Wang, Xinlong and Liu, Jing}, title = {Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12998-13008} }
Differentiable Display Photometric Stereo: Seokjun Choi,

Seungwoo Yoon,

Giljoo Nam,

Seungyong Lee,

Seung-Hwan Baek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Seokjun and Yoon, Seungwoo and Nam, Giljoo and Lee, Seungyong and Baek, Seung-Hwan}, title = {Differentiable Display Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11831-11840} }
In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification: Jinseong Park,

Yujin Choi,

Jaewook Lee; [pdf] [supp]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jinseong and Choi, Yujin and Lee, Jaewook}, title = {In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12236-12246} }
LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels: Tuo Feng,

Wenguan Wang,

Fan Ma,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Tuo and Wang, Wenguan and Ma, Fan and Yang, Yi}, title = {LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14916-14927} }
Diversified and Personalized Multi-rater Medical Image Segmentation: Yicheng Wu,

Xiangde Luo,

Zhe Xu,

Xiaoqing Guo,

Lie Ju,

Zongyuan Ge,

Wenjun Liao,

Jianfei Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Yicheng and Luo, Xiangde and Xu, Zhe and Guo, Xiaoqing and Ju, Lie and Ge, Zongyuan and Liao, Wenjun and Cai, Jianfei}, title = {Diversified and Personalized Multi-rater Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11470-11479} }
Discover and Mitigate Multiple Biased Subgroups in Image Classifiers: Zeliang Zhang,

Mingqian Feng,

Zhiheng Li,

Chenliang Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zeliang and Feng, Mingqian and Li, Zhiheng and Xu, Chenliang}, title = {Discover and Mitigate Multiple Biased Subgroups in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10906-10915} }
ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations: Rwiddhi Chakraborty,

Adrian Sletten,

Michael C. Kampffmeyer; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chakraborty_2024_CVPR, author = {Chakraborty, Rwiddhi and Sletten, Adrian and Kampffmeyer, Michael C.}, title = {ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12017-12026} }
Learning to Segment Referred Objects from Narrated Egocentric Videos: Yuhan Shen,

Huiyu Wang,

Xitong Yang,

Matt Feiszli,

Ehsan Elhamifar,

Lorenzo Torresani,

Effrosyni Mavroudi; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Wang, Huiyu and Yang, Xitong and Feiszli, Matt and Elhamifar, Ehsan and Torresani, Lorenzo and Mavroudi, Effrosyni}, title = {Learning to Segment Referred Objects from Narrated Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14510-14520} }
Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images: Chaoqin Huang,

Aofan Jiang,

Jinghao Feng,

Ya Zhang,

Xinchao Wang,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Chaoqin and Jiang, Aofan and Feng, Jinghao and Zhang, Ya and Wang, Xinchao and Wang, Yanfeng}, title = {Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11375-11385} }
Depth-aware Test-Time Training for Zero-shot Video Object Segmentation: Weihuang Liu,

Xi Shen,

Haolun Li,

Xiuli Bi,

Bo Liu,

Chi-Man Pun,

Xiaodong Cun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Weihuang and Shen, Xi and Li, Haolun and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Cun, Xiaodong}, title = {Depth-aware Test-Time Training for Zero-shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19218-19227} }
RMem: Restricted Memory Banks Improve Video Object Segmentation: Junbao Zhou,

Ziqi Pang,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Junbao and Pang, Ziqi and Wang, Yu-Xiong}, title = {RMem: Restricted Memory Banks Improve Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18602-18611} }
Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers: Hongjie Wang,

Bhishma Dedhia,

Niraj K. Jha; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Dedhia, Bhishma and Jha, Niraj K.}, title = {Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16070-16079} }
DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement: Hao Wu,

Huabin Liu,

Yu Qiao,

Xiao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Hao and Liu, Huabin and Qiao, Yu and Sun, Xiao}, title = {DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18699-18708} }
SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge: Andong Wang,

Bo Wu,

Sunli Chen,

Zhenfang Chen,

Haotian Guan,

Wei-Ning Lee,

Li Erran Li,

Chuang Gan; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Andong and Wu, Bo and Chen, Sunli and Chen, Zhenfang and Guan, Haotian and Lee, Wei-Ning and Li, Li Erran and Gan, Chuang}, title = {SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13384-13394} }
LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking: Jialin Li,

Qiang Nie,

Weifu Fu,

Yuhuan Lin,

Guangpin Tao,

Yong Liu,

Chengjie Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jialin and Nie, Qiang and Fu, Weifu and Lin, Yuhuan and Tao, Guangpin and Liu, Yong and Wang, Chengjie}, title = {LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15866-15876} }
Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer: Zhen Zhao,

Jingqun Tang,

Chunhui Lin,

Binghong Wu,

Can Huang,

Hao Liu,

Xin Tan,

Zhizhong Zhang,

Yuan Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhen and Tang, Jingqun and Lin, Chunhui and Wu, Binghong and Huang, Can and Liu, Hao and Tan, Xin and Zhang, Zhizhong and Xie, Yuan}, title = {Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15567-15576} }
Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning: Zichen Miao,

Jiang Wang,

Ze Wang,

Zhengyuan Yang,

Lijuan Wang,

Qiang Qiu,

Zicheng Liu; [pdf] [supp]
[bibtex]
@InProceedings{Miao_2024_CVPR, author = {Miao, Zichen and Wang, Jiang and Wang, Ze and Yang, Zhengyuan and Wang, Lijuan and Qiu, Qiang and Liu, Zicheng}, title = {Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10844-10853} }
LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation: Ke Guo,

Zhenwei Miao,

Wei Jing,

Weiwei Liu,

Weizi Li,

Dayang Hao,

Jia Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Ke and Miao, Zhenwei and Jing, Wei and Liu, Weiwei and Li, Weizi and Hao, Dayang and Pan, Jia}, title = {LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15386-15395} }
SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects: Abhinav Kumar,

Yuliang Guo,

Xinyu Huang,

Liu Ren,

Xiaoming Liu; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Abhinav and Guo, Yuliang and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10269-10280} }
NOPE: Novel Object Pose Estimation from a Single Image: Van Nguyen Nguyen,

Thibault Groueix,

Georgy Ponimatkin,

Yinlin Hu,

Renaud Marlet,

Mathieu Salzmann,

Vincent Lepetit; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Ponimatkin, Georgy and Hu, Yinlin and Marlet, Renaud and Salzmann, Mathieu and Lepetit, Vincent}, title = {NOPE: Novel Object Pose Estimation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17923-17932} }
Dual-View Visual Contextualization for Web Navigation: Jihyung Kil,

Chan Hee Song,

Boyuan Zheng,

Xiang Deng,

Yu Su,

Wei-Lun Chao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kil_2024_CVPR, author = {Kil, Jihyung and Song, Chan Hee and Zheng, Boyuan and Deng, Xiang and Su, Yu and Chao, Wei-Lun}, title = {Dual-View Visual Contextualization for Web Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14445-14454} }
Language-driven Grasp Detection: An Dinh Vuong,

Minh Nhat Vu,

Baoru Huang,

Nghia Nguyen,

Hieu Le,

Thieu Vo,

Anh Nguyen; [pdf] [supp]
[bibtex]
@InProceedings{Vuong_2024_CVPR, author = {Vuong, An Dinh and Vu, Minh Nhat and Huang, Baoru and Nguyen, Nghia and Le, Hieu and Vo, Thieu and Nguyen, Anh}, title = {Language-driven Grasp Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17902-17912} }
Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods: Chenfan Qu,

Yiwu Zhong,

Chongyu Liu,

Guitao Xu,

Dezhi Peng,

Fengjun Guo,

Lianwen Jin; [pdf] [supp]
[bibtex]
@InProceedings{Qu_2024_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Liu, Chongyu and Xu, Guitao and Peng, Dezhi and Guo, Fengjun and Jin, Lianwen}, title = {Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10781-10790} }
Object Recognition as Next Token Prediction: Kaiyu Yue,

Bor-Chun Chen,

Jonas Geiping,

Hengduo Li,

Tom Goldstein,

Ser-Nam Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2024_CVPR, author = {Yue, Kaiyu and Chen, Bor-Chun and Geiping, Jonas and Li, Hengduo and Goldstein, Tom and Lim, Ser-Nam}, title = {Object Recognition as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16645-16656} }
Transcriptomics-guided Slide Representation Learning in Computational Pathology: Guillaume Jaume,

Lukas Oldenburg,

Anurag Vaidya,

Richard J. Chen,

Drew F.K. Williamson,

Thomas Peeters,

Andrew H. Song,

Faisal Mahmood; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Oldenburg, Lukas and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Peeters, Thomas and Song, Andrew H. and Mahmood, Faisal}, title = {Transcriptomics-guided Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9632-9644} }
CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow: Chenbin Pan,

Burhaneddin Yaman,

Senem Velipasalar,

Liu Ren; [pdf] [supp]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Velipasalar, Senem and Ren, Liu}, title = {CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15216-15225} }
CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update: Zhi Gao,

Yuntao Du,

Xintong Zhang,

Xiaojian Ma,

Wenjuan Han,

Song-Chun Zhu,

Qing Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Zhi and Du, Yuntao and Zhang, Xintong and Ma, Xiaojian and Han, Wenjuan and Zhu, Song-Chun and Li, Qing}, title = {CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13258-13268} }
Depth Prompting for Sensor-Agnostic Depth Estimation: Jin-Hwi Park,

Chanhwi Jeong,

Junoh Lee,

Hae-Gon Jeon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jin-Hwi and Jeong, Chanhwi and Lee, Junoh and Jeon, Hae-Gon}, title = {Depth Prompting for Sensor-Agnostic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9859-9869} }
G3DR: Generative 3D Reconstruction in ImageNet: Pradyumna Reddy,

Ismail Elezi,

Jiankang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2024_CVPR, author = {Reddy, Pradyumna and Elezi, Ismail and Deng, Jiankang}, title = {G3DR: Generative 3D Reconstruction in ImageNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9655-9665} }
Hyperspherical Classification with Dynamic Label-to-Prototype Assignment: Mohammad Saeed Ebrahimi Saadabadi,

Ali Dabouei,

Sahar Rahimi Malakshan,

Nasser M. Nasrabadi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saadabadi_2024_CVPR, author = {Saadabadi, Mohammad Saeed Ebrahimi and Dabouei, Ali and Malakshan, Sahar Rahimi and Nasrabadi, Nasser M.}, title = {Hyperspherical Classification with Dynamic Label-to-Prototype Assignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17333-17342} }
VTimeLLM: Empower LLM to Grasp Video Moments: Bin Huang,

Xin Wang,

Hong Chen,

Zihan Song,

Wenwu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Bin and Wang, Xin and Chen, Hong and Song, Zihan and Zhu, Wenwu}, title = {VTimeLLM: Empower LLM to Grasp Video Moments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14271-14280} }
FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning: Junyuan Zhang,

Shuang Zeng,

Miao Zhang,

Runxi Wang,

Feifei Wang,

Yuyin Zhou,

Paul Pu Liang,

Liangqiong Qu; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyuan and Zeng, Shuang and Zhang, Miao and Wang, Runxi and Wang, Feifei and Zhou, Yuyin and Liang, Paul Pu and Qu, Liangqiong}, title = {FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12098-12108} }
Privacy-Preserving Optics for Enhancing Protection in Face De-Identification: Jhon Lopez,

Carlos Hinojosa,

Henry Arguello,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lopez_2024_CVPR, author = {Lopez, Jhon and Hinojosa, Carlos and Arguello, Henry and Ghanem, Bernard}, title = {Privacy-Preserving Optics for Enhancing Protection in Face De-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12120-12129} }
SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction: Yang Zhou,

Hao Shao,

Letian Wang,

Steven L. Waslander,

Hongsheng Li,

Yu Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Shao, Hao and Wang, Letian and Waslander, Steven L. and Li, Hongsheng and Liu, Yu}, title = {SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15281-15290} }
Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning: Menghao Zhang,

Jingyu Wang,

Qi Qi,

Haifeng Sun,

Zirui Zhuang,

Pengfei Ren,

Ruilong Ma,

Jianxin Liao; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Menghao and Wang, Jingyu and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Ren, Pengfei and Ma, Ruilong and Liao, Jianxin}, title = {Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17385-17394} }
Generative Multimodal Models are In-Context Learners: Quan Sun,

Yufeng Cui,

Xiaosong Zhang,

Fan Zhang,

Qiying Yu,

Yueze Wang,

Yongming Rao,

Jingjing Liu,

Tiejun Huang,

Xinlong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Quan and Cui, Yufeng and Zhang, Xiaosong and Zhang, Fan and Yu, Qiying and Wang, Yueze and Rao, Yongming and Liu, Jingjing and Huang, Tiejun and Wang, Xinlong}, title = {Generative Multimodal Models are In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14398-14409} }
Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology: Wenhao Tang,

Fengtao Zhou,

Sheng Huang,

Xiang Zhu,

Yi Zhang,

Bo Liu; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Wenhao and Zhou, Fengtao and Huang, Sheng and Zhu, Xiang and Zhang, Yi and Liu, Bo}, title = {Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11343-11352} }
Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection: Zhiwei Yang,

Jing Liu,

Peng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Liu, Jing and Wu, Peng}, title = {Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18899-18908} }
SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction: Pin Tang,

Zhongdao Wang,

Guoqing Wang,

Jilai Zheng,

Xiangxuan Ren,

Bailan Feng,

Chao Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Pin and Wang, Zhongdao and Wang, Guoqing and Zheng, Jilai and Ren, Xiangxuan and Feng, Bailan and Ma, Chao}, title = {SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15035-15044} }
Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture: Fei Wang,

Dan Guo,

Kun Li,

Zhun Zhong,

Meng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Fei and Guo, Dan and Li, Kun and Zhong, Zhun and Wang, Meng}, title = {Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18984-18994} }
Hyperbolic Learning with Synthetic Captions for Open-World Detection: Fanjie Kong,

Yanbei Chen,

Jiarui Cai,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Fanjie and Chen, Yanbei and Cai, Jiarui and Modolo, Davide}, title = {Hyperbolic Learning with Synthetic Captions for Open-World Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16762-16771} }
Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding: Alessandro Achille,

Greg Ver Steeg,

Tian Yu Liu,

Matthew Trager,

Carson Klingenberg,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Achille_2024_CVPR, author = {Achille, Alessandro and Steeg, Greg Ver and Liu, Tian Yu and Trager, Matthew and Klingenberg, Carson and Soatto, Stefano}, title = {Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11062-11071} }
3D Feature Tracking via Event Camera: Siqi Li,

Zhikuan Zhou,

Zhou Xue,

Yipeng Li,

Shaoyi Du,

Yue Gao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Siqi and Zhou, Zhikuan and Xue, Zhou and Li, Yipeng and Du, Shaoyi and Gao, Yue}, title = {3D Feature Tracking via Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18974-18983} }
MaxQ: Multi-Axis Query for N:M Sparsity Network: Jingyang Xiang,

Siqi Li,

Junhao Chen,

Zhuangzhi Chen,

Tianxin Huang,

Linpeng Peng,

Yong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jingyang and Li, Siqi and Chen, Junhao and Chen, Zhuangzhi and Huang, Tianxin and Peng, Linpeng and Liu, Yong}, title = {MaxQ: Multi-Axis Query for N:M Sparsity Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15845-15854} }
Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition: Anqi Zhu,

Qiuhong Ke,

Mingming Gong,

James Bailey; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Anqi and Ke, Qiuhong and Gong, Mingming and Bailey, James}, title = {Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18761-18770} }
Composing Object Relations and Attributes for Image-Text Matching: Khoi Pham,

Chuong Huynh,

Ser-Nam Lim,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pham_2024_CVPR, author = {Pham, Khoi and Huynh, Chuong and Lim, Ser-Nam and Shrivastava, Abhinav}, title = {Composing Object Relations and Attributes for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14354-14363} }
Previously on ... From Recaps to Story Summarization: Aditya Kumar Singh,

Dhruv Srivastava,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Singh_2024_CVPR, author = {Singh, Aditya Kumar and Srivastava, Dhruv and Tapaswi, Makarand}, title = {Previously on ... From Recaps to Story Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13635-13646} }
mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration: Qinghao Ye,

Haiyang Xu,

Jiabo Ye,

Ming Yan,

Anwen Hu,

Haowei Liu,

Qi Qian,

Ji Zhang,

Fei Huang; [pdf] [supp]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Qinghao and Xu, Haiyang and Ye, Jiabo and Yan, Ming and Hu, Anwen and Liu, Haowei and Qian, Qi and Zhang, Ji and Huang, Fei}, title = {mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13040-13051} }
Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning: Rongjie Li,

Yu Wu,

Xuming He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Rongjie and Wu, Yu and He, Xuming}, title = {Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13428-13437} }
Supervised Anomaly Detection for Complex Industrial Images: Aimira Baitieva,

David Hurych,

Victor Besnier,

Olivier Bernard; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Baitieva_2024_CVPR, author = {Baitieva, Aimira and Hurych, David and Besnier, Victor and Bernard, Olivier}, title = {Supervised Anomaly Detection for Complex Industrial Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17754-17762} }
Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships: Sebastian Koch,

Narunas Vaskevicius,

Mirco Colosi,

Pedro Hermosilla,

Timo Ropinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koch_2024_CVPR, author = {Koch, Sebastian and Vaskevicius, Narunas and Colosi, Mirco and Hermosilla, Pedro and Ropinski, Timo}, title = {Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14183-14193} }
SURE: SUrvey REcipes for building reliable and robust deep networks: Yuting Li,

Yingyi Chen,

Xuanlong Yu,

Dexiong Chen,

Xi Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuting and Chen, Yingyi and Yu, Xuanlong and Chen, Dexiong and Shen, Xi}, title = {SURE: SUrvey REcipes for building reliable and robust deep networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17500-17510} }
PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates: Ruoqi Wang,

Zhuoyang Chen,

Jiayi Zhu,

Qiong Luo,

Feng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Ruoqi and Chen, Zhuoyang and Zhu, Jiayi and Luo, Qiong and Wang, Feng}, title = {PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12841-12850} }
Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation: Razvan-George Pasca,

Alexey Gavryushin,

Muhammad Hamza,

Yen-Ling Kuo,

Kaichun Mo,

Luc Van Gool,

Otmar Hilliges,

Xi Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pasca_2024_CVPR, author = {Pasca, Razvan-George and Gavryushin, Alexey and Hamza, Muhammad and Kuo, Yen-Ling and Mo, Kaichun and Van Gool, Luc and Hilliges, Otmar and Wang, Xi}, title = {Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18286-18296} }
Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency: Yuqi Zhang,

Han Luo,

Yinjie Lei; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuqi and Luo, Han and Lei, Yinjie}, title = {Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13063-13072} }
Optimal Transport Aggregation for Visual Place Recognition: Sergio Izquierdo,

Javier Civera; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Izquierdo_2024_CVPR, author = {Izquierdo, Sergio and Civera, Javier}, title = {Optimal Transport Aggregation for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17658-17668} }
Aligning and Prompting Everything All at Once for Universal Visual Perception: Yunhang Shen,

Chaoyou Fu,

Peixian Chen,

Mengdan Zhang,

Ke Li,

Xing Sun,

Yunsheng Wu,

Shaohui Lin,

Rongrong Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Yunhang and Fu, Chaoyou and Chen, Peixian and Zhang, Mengdan and Li, Ke and Sun, Xing and Wu, Yunsheng and Lin, Shaohui and Ji, Rongrong}, title = {Aligning and Prompting Everything All at Once for Universal Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13193-13203} }
Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities: Mingcheng Li,

Dingkang Yang,

Xiao Zhao,

Shuaibing Wang,

Yan Wang,

Kun Yang,

Mingyang Sun,

Dongliang Kou,

Ziyun Qian,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Mingcheng and Yang, Dingkang and Zhao, Xiao and Wang, Shuaibing and Wang, Yan and Yang, Kun and Sun, Mingyang and Kou, Dongliang and Qian, Ziyun and Zhang, Lihua}, title = {Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12458-12468} }
LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation: Linfeng Yuan,

Miaojing Shi,

Zijie Yue,

Qijun Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Linfeng and Shi, Miaojing and Yue, Zijie and Chen, Qijun}, title = {LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14001-14010} }
Dual Prototype Attention for Unsupervised Video Object Segmentation: Suhwan Cho,

Minhyeok Lee,

Seunghoon Lee,

Dogyoon Lee,

Heeseung Choi,

Ig-Jae Kim,

Sangyoun Lee; [pdf] [arXiv]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Suhwan and Lee, Minhyeok and Lee, Seunghoon and Lee, Dogyoon and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Dual Prototype Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19238-19247} }
Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse: Yining Wang,

Junjie Sun,

Chenyue Wang,

Mi Zhang,

Min Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yining and Sun, Junjie and Wang, Chenyue and Zhang, Mi and Yang, Min}, title = {Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12322-12331} }
A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion: Feng Yu,

Teng Zhang,

Gilad Lerman; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Feng and Zhang, Teng and Lerman, Gilad}, title = {A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14575-14584} }
CAD: Photorealistic 3D Generation via Adversarial Distillation: Ziyu Wan,

Despoina Paschalidou,

Ian Huang,

Hongyu Liu,

Bokui Shen,

Xiaoyu Xiang,

Jing Liao,

Leonidas Guibas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2024_CVPR, author = {Wan, Ziyu and Paschalidou, Despoina and Huang, Ian and Liu, Hongyu and Shen, Bokui and Xiang, Xiaoyu and Liao, Jing and Guibas, Leonidas}, title = {CAD: Photorealistic 3D Generation via Adversarial Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10194-10207} }
Enhancing Vision-Language Pre-training with Rich Supervisions: Yuan Gao,

Kunyu Shi,

Pengkai Zhu,

Edouard Belval,

Oren Nuriel,

Srikar Appalaraju,

Shabnam Ghadar,

Zhuowen Tu,

Vijay Mahadevan,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Shi, Kunyu and Zhu, Pengkai and Belval, Edouard and Nuriel, Oren and Appalaraju, Srikar and Ghadar, Shabnam and Tu, Zhuowen and Mahadevan, Vijay and Soatto, Stefano}, title = {Enhancing Vision-Language Pre-training with Rich Supervisions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13480-13491} }
Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning: Youqi Pan,

Wugen Zhou,

Yingdian Cao,

Hongbin Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pan_2024_CVPR, author = {Pan, Youqi and Zhou, Wugen and Cao, Yingdian and Zha, Hongbin}, title = {Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18019-18028} }
Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching: Shitong Shao,

Zeyuan Yin,

Muxin Zhou,

Xindong Zhang,

Zhiqiang Shen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Shitong and Yin, Zeyuan and Zhou, Muxin and Zhang, Xindong and Shen, Zhiqiang}, title = {Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16709-16718} }
On Train-Test Class Overlap and Detection for Image Retrieval: Chull Hwan Song,

Jooyoung Yoon,

Taebaek Hwang,

Shunghyun Choi,

Yeong Hyeon Gu,

Yannis Avrithis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Yoon, Jooyoung and Hwang, Taebaek and Choi, Shunghyun and Gu, Yeong Hyeon and Avrithis, Yannis}, title = {On Train-Test Class Overlap and Detection for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17375-17384} }
AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing: Fan Yang,

Tianyi Chen,

Xiaosheng He,

Zhongang Cai,

Lei Yang,

Si Wu,

Guosheng Lin; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Fan and Chen, Tianyi and He, Xiaosheng and Cai, Zhongang and Yang, Lei and Wu, Si and Lin, Guosheng}, title = {AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10596-10605} }
Learning Object State Changes in Videos: An Open-World Perspective: Zihui Xue,

Kumar Ashutosh,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Zihui and Ashutosh, Kumar and Grauman, Kristen}, title = {Learning Object State Changes in Videos: An Open-World Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18493-18503} }
SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation: Zhixuan Liu,

Peter Schaldenbrand,

Beverley-Claire Okogwu,

Wenxuan Peng,

Youngsik Yun,

Andrew Hundt,

Jihie Kim,

Jean Oh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Zhixuan and Schaldenbrand, Peter and Okogwu, Beverley-Claire and Peng, Wenxuan and Yun, Youngsik and Hundt, Andrew and Kim, Jihie and Oh, Jean}, title = {SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10822-10832} }
Iterated Learning Improves Compositionality in Large Vision-Language Models: Chenhao Zheng,

Jieyu Zhang,

Aniruddha Kembhavi,

Ranjay Krishna; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Kembhavi, Aniruddha and Krishna, Ranjay}, title = {Iterated Learning Improves Compositionality in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13785-13795} }
Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline: Xiao Wang,

Shiao Wang,

Chuanming Tang,

Lin Zhu,

Bo Jiang,

Yonghong Tian,

Jin Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiao and Wang, Shiao and Tang, Chuanming and Zhu, Lin and Jiang, Bo and Tian, Yonghong and Tang, Jin}, title = {Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19248-19257} }
Dual DETRs for Multi-Label Temporal Action Detection: Yuhan Zhu,

Guozhen Zhang,

Jing Tan,

Gangshan Wu,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yuhan and Zhang, Guozhen and Tan, Jing and Wu, Gangshan and Wang, Limin}, title = {Dual DETRs for Multi-Label Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18559-18569} }
Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning: Jiahan Li,

Jiuyang Dong,

Shenjin Huang,

Xi Li,

Junjun Jiang,

Xiaopeng Fan,

Yongbing Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jiahan and Dong, Jiuyang and Huang, Shenjin and Li, Xi and Jiang, Junjun and Fan, Xiaopeng and Zhang, Yongbing}, title = {Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11259-11268} }
DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions: Yunxiao Shi,

Manish Kumar Singh,

Hong Cai,

Fatih Porikli; [pdf] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Yunxiao and Singh, Manish Kumar and Cai, Hong and Porikli, Fatih}, title = {DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10736-10746} }
Utility-Fairness Trade-Offs and How to Find Them: Sepehr Dehdashtian,

Bashir Sadeghi,

Vishnu Naresh Boddeti; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dehdashtian_2024_CVPR, author = {Dehdashtian, Sepehr and Sadeghi, Bashir and Boddeti, Vishnu Naresh}, title = {Utility-Fairness Trade-Offs and How to Find Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12037-12046} }
SAOR: Single-View Articulated Object Reconstruction: Mehmet Aygun,

Oisin Mac Aodha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Aygun_2024_CVPR, author = {Aygun, Mehmet and Mac Aodha, Oisin}, title = {SAOR: Single-View Articulated Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10382-10391} }
A Theory of Joint Light and Heat Transport for Lambertian Scenes: Mani Ramanagopal,

Sriram Narayanan,

Aswin C. Sankaranarayanan,

Srinivasa G. Narasimhan; [pdf] [supp]
[bibtex]
@InProceedings{Ramanagopal_2024_CVPR, author = {Ramanagopal, Mani and Narayanan, Sriram and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {A Theory of Joint Light and Heat Transport for Lambertian Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11924-11933} }
iKUN: Speak to Trackers without Retraining: Yunhao Du,

Cheng Lei,

Zhicheng Zhao,

Fei Su; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2024_CVPR, author = {Du, Yunhao and Lei, Cheng and Zhao, Zhicheng and Su, Fei}, title = {iKUN: Speak to Trackers without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19135-19144} }
Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction: Zhenzhong Kuang,

Xiaochen Yang,

Yingjie Shen,

Chao Hu,

Jun Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kuang_2024_CVPR, author = {Kuang, Zhenzhong and Yang, Xiaochen and Shen, Yingjie and Hu, Chao and Yu, Jun}, title = {Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12406-12415} }
3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation: Songchun Zhang,

Yibo Zhang,

Quan Zheng,

Rui Ma,

Wei Hua,

Hujun Bao,

Weiwei Xu,

Changqing Zou; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Songchun and Zhang, Yibo and Zheng, Quan and Ma, Rui and Hua, Wei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10170-10180} }
VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources: Fan Fei,

Jiajun Tang,

Ping Tan,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Fei_2024_CVPR, author = {Fei, Fan and Tang, Jiajun and Tan, Ping and Shi, Boxin}, title = {VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11800-11809} }
RoHM: Robust Human Motion Reconstruction via Diffusion: Siwei Zhang,

Bharat Lal Bhatnagar,

Yuanlu Xu,

Alexander Winkler,

Petr Kadlecek,

Siyu Tang,

Federica Bogo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Siwei and Bhatnagar, Bharat Lal and Xu, Yuanlu and Winkler, Alexander and Kadlecek, Petr and Tang, Siyu and Bogo, Federica}, title = {RoHM: Robust Human Motion Reconstruction via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14606-14617} }
Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval: Minkuk Kim,

Hyeon Bae Kim,

Jinyoung Moon,

Jinwoo Choi,

Seong Tae Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Minkuk and Kim, Hyeon Bae and Moon, Jinyoung and Choi, Jinwoo and Kim, Seong Tae}, title = {Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13894-13904} }
SPAD: Spatially Aware Multi-View Diffusers: Yash Kant,

Aliaksandr Siarohin,

Ziyi Wu,

Michael Vasilkovsky,

Guocheng Qian,

Jian Ren,

Riza Alp Guler,

Bernard Ghanem,

Sergey Tulyakov,

Igor Gilitschenski; [pdf] [supp]
[bibtex]
@InProceedings{Kant_2024_CVPR, author = {Kant, Yash and Siarohin, Aliaksandr and Wu, Ziyi and Vasilkovsky, Michael and Qian, Guocheng and Ren, Jian and Guler, Riza Alp and Ghanem, Bernard and Tulyakov, Sergey and Gilitschenski, Igor}, title = {SPAD: Spatially Aware Multi-View Diffusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10026-10038} }
Gradient Reweighting: Towards Imbalanced Class-Incremental Learning: Jiangpeng He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Jiangpeng}, title = {Gradient Reweighting: Towards Imbalanced Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16668-16677} }
Gaussian Splatting SLAM: Hidenobu Matsuki,

Riku Murai,

Paul H.J. Kelly,

Andrew J. Davison; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Matsuki_2024_CVPR, author = {Matsuki, Hidenobu and Murai, Riku and Kelly, Paul H.J. and Davison, Andrew J.}, title = {Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18039-18048} }
Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor: Jae Hyeon Park,

Gyoomin Lee,

Seunggi Park,

Sung In Cho; [pdf] [supp]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jae Hyeon and Lee, Gyoomin and Park, Seunggi and Cho, Sung In}, title = {Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17722-17731} }
A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames: Pinelopi Papalampidi,

Skanda Koppula,

Shreya Pathak,

Justin Chiu,

Joe Heyward,

Viorica Patraucean,

Jiajun Shen,

Antoine Miech,

Andrew Zisserman,

Aida Nematzdeh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Papalampidi_2024_CVPR, author = {Papalampidi, Pinelopi and Koppula, Skanda and Pathak, Shreya and Chiu, Justin and Heyward, Joe and Patraucean, Viorica and Shen, Jiajun and Miech, Antoine and Zisserman, Andrew and Nematzdeh, Aida}, title = {A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14386-14397} }
Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation: Xiao Ma,

Sumit Patidar,

Iain Haughton,

Stephen James; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Xiao and Patidar, Sumit and Haughton, Iain and James, Stephen}, title = {Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18081-18090} }
Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions: Runhao Zeng,

Xiaoyong Chen,

Jiaming Liang,

Huisi Wu,

Guangzhong Cao,

Yong Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Runhao and Chen, Xiaoyong and Liang, Jiaming and Wu, Huisi and Cao, Guangzhong and Guo, Yong}, title = {Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18263-18274} }
Open-World Human-Object Interaction Detection via Multi-modal Prompts: Jie Yang,

Bingliang Li,

Ailing Zeng,

Lei Zhang,

Ruimao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jie and Li, Bingliang and Zeng, Ailing and Zhang, Lei and Zhang, Ruimao}, title = {Open-World Human-Object Interaction Detection via Multi-modal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16954-16964} }
UniMODE: Unified Monocular 3D Object Detection: Zhuoling Li,

Xiaogang Xu,

SerNam Lim,

Hengshuang Zhao; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhuoling and Xu, Xiaogang and Lim, SerNam and Zhao, Hengshuang}, title = {UniMODE: Unified Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16561-16570} }
Multi-agent Collaborative Perception via Motion-aware Robust Communication Network: Shixin Hong,

Yu Liu,

Zhi Li,

Shaohui Li,

You He; [pdf]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Shixin and Liu, Yu and Li, Zhi and Li, Shaohui and He, You}, title = {Multi-agent Collaborative Perception via Motion-aware Robust Communication Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15301-15310} }
The Manga Whisperer: Automatically Generating Transcriptions for Comics: Ragav Sachdeva,

Andrew Zisserman; [pdf] [arXiv]
[bibtex]
@InProceedings{Sachdeva_2024_CVPR, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {The Manga Whisperer: Automatically Generating Transcriptions for Comics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12967-12976} }
Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection: Heng Zhang,

Qiuyu Zhao,

Linyu Zheng,

Hao Zeng,

Zhiwei Ge,

Tianhao Li,

Sulong Xu; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Heng and Zhao, Qiuyu and Zheng, Linyu and Zeng, Hao and Ge, Zhiwei and Li, Tianhao and Xu, Sulong}, title = {Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16975-16984} }
MovieChat: From Dense Token to Sparse Memory for Long Video Understanding: Enxin Song,

Wenhao Chai,

Guanhong Wang,

Yucheng Zhang,

Haoyang Zhou,

Feiyang Wu,

Haozhe Chi,

Xun Guo,

Tian Ye,

Yanting Zhang,

Yan Lu,

Jenq-Neng Hwang,

Gaoang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Enxin and Chai, Wenhao and Wang, Guanhong and Zhang, Yucheng and Zhou, Haoyang and Wu, Feiyang and Chi, Haozhe and Guo, Xun and Ye, Tian and Zhang, Yanting and Lu, Yan and Hwang, Jenq-Neng and Wang, Gaoang}, title = {MovieChat: From Dense Token to Sparse Memory for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18221-18232} }
Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods: Mingqi Jiang,

Saeed Khorram,

Li Fuxin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Mingqi and Khorram, Saeed and Fuxin, Li}, title = {Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9546-9555} }
Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion: Fan Zhang,

Shaodi You,

Yu Li,

Ying Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and You, Shaodi and Li, Yu and Fu, Ying}, title = {Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11852-11861} }
Matching Anything by Segmenting Anything: Siyuan Li,

Lei Ke,

Martin Danelljan,

Luigi Piccinelli,

Mattia Segu,

Luc Van Gool,

Fisher Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Siyuan and Ke, Lei and Danelljan, Martin and Piccinelli, Luigi and Segu, Mattia and Van Gool, Luc and Yu, Fisher}, title = {Matching Anything by Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18963-18973} }
Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection: Jiacheng Zhang,

Jiaming Li,

Xiangru Lin,

Wei Zhang,

Xiao Tan,

Junyu Han,

Errui Ding,

Jingdong Wang,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiacheng and Li, Jiaming and Lin, Xiangru and Zhang, Wei and Tan, Xiao and Han, Junyu and Ding, Errui and Wang, Jingdong and Li, Guanbin}, title = {Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16923-16932} }
Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation: Ming Xu,

Stephen Gould; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Ming and Gould, Stephen}, title = {Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14618-14627} }
Learning Transferable Negative Prompts for Out-of-Distribution Detection: Tianqi Li,

Guansong Pang,

Xiao Bai,

Wenjun Miao,

Jin Zheng; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Tianqi and Pang, Guansong and Bai, Xiao and Miao, Wenjun and Zheng, Jin}, title = {Learning Transferable Negative Prompts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17584-17594} }
Holistic Features are almost Sufficient for Text-to-Video Retrieval: Kaibin Tian,

Ruixiang Zhao,

Zijie Xin,

Bangxiang Lan,

Xirong Li; [pdf]
[bibtex]
@InProceedings{Tian_2024_CVPR, author = {Tian, Kaibin and Zhao, Ruixiang and Xin, Zijie and Lan, Bangxiang and Li, Xirong}, title = {Holistic Features are almost Sufficient for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17138-17147} }
Uncertainty-aware Action Decoupling Transformer for Action Anticipation: Hongji Guo,

Nakul Agarwal,

Shao-Yuan Lo,

Kwonjoon Lee,

Qiang Ji; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Hongji and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon and Ji, Qiang}, title = {Uncertainty-aware Action Decoupling Transformer for Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18644-18654} }
One-Prompt to Segment All Medical Images: Junde Wu,

Min Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Junde and Xu, Min}, title = {One-Prompt to Segment All Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11302-11312} }
GROUNDHOG: Grounding Large Language Models to Holistic Segmentation: Yichi Zhang,

Ziqiao Ma,

Xiaofeng Gao,

Suhaila Shakiah,

Qiaozi Gao,

Joyce Chai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yichi and Ma, Ziqiao and Gao, Xiaofeng and Shakiah, Suhaila and Gao, Qiaozi and Chai, Joyce}, title = {GROUNDHOG: Grounding Large Language Models to Holistic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14227-14238} }
Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts: Jialin Wu,

Xia Hu,

Yaqing Wang,

Bo Pang,

Radu Soricut; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Jialin and Hu, Xia and Wang, Yaqing and Pang, Bo and Soricut, Radu}, title = {Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14205-14215} }
SeMoLi: What Moves Together Belongs Together: Jenny Seidenschwarz,

Aljosa Osep,

Francesco Ferroni,

Simon Lucey,

Laura Leal-Taixe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seidenschwarz_2024_CVPR, author = {Seidenschwarz, Jenny and Osep, Aljosa and Ferroni, Francesco and Lucey, Simon and Leal-Taixe, Laura}, title = {SeMoLi: What Moves Together Belongs Together}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14685-14694} }
Context-Guided Spatio-Temporal Video Grounding: Xin Gu,

Heng Fan,

Yan Huang,

Tiejian Luo,

Libo Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Xin and Fan, Heng and Huang, Yan and Luo, Tiejian and Zhang, Libo}, title = {Context-Guided Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18330-18339} }
Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions: Namitha Padmanabhan,

Matthew Gwilliam,

Pulkit Kumar,

Shishira R Maiya,

Max Ehrlich,

Abhinav Shrivastava; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Padmanabhan_2024_CVPR, author = {Padmanabhan, Namitha and Gwilliam, Matthew and Kumar, Pulkit and Maiya, Shishira R and Ehrlich, Max and Shrivastava, Abhinav}, title = {Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10957-10967} }
Adapting to Length Shift: FlexiLength Network for Trajectory Prediction: Yi Xu,

Yun Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Yi and Fu, Yun}, title = {Adapting to Length Shift: FlexiLength Network for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15226-15237} }
WorDepth: Variational Language Prior for Monocular Depth Estimation: Ziyao Zeng,

Daniel Wang,

Fengyu Yang,

Hyoungseob Park,

Stefano Soatto,

Dong Lao,

Alex Wong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Ziyao and Wang, Daniel and Yang, Fengyu and Park, Hyoungseob and Soatto, Stefano and Lao, Dong and Wong, Alex}, title = {WorDepth: Variational Language Prior for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9708-9719} }
A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning: Yuelin Zhang,

Pengyu Zheng,

Wanquan Yan,

Chengyu Fang,

Shing Shin Cheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuelin and Zheng, Pengyu and Yan, Wanquan and Fang, Chengyu and Cheng, Shing Shin}, title = {A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11125-11136} }
Frozen Feature Augmentation for Few-Shot Image Classification: Andreas Bär,

Neil Houlsby,

Mostafa Dehghani,

Manoj Kumar; [pdf] [supp]
[bibtex]
@InProceedings{Bar_2024_CVPR, author = {B\"ar, Andreas and Houlsby, Neil and Dehghani, Mostafa and Kumar, Manoj}, title = {Frozen Feature Augmentation for Few-Shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16046-16057} }
Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition: Kyle Buettner,

Sina Malakouti,

Xiang Lorraine Li,

Adriana Kovashka; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Buettner_2024_CVPR, author = {Buettner, Kyle and Malakouti, Sina and Li, Xiang Lorraine and Kovashka, Adriana}, title = {Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13515-13524} }
PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs: Michael Dorkenwald,

Nimrod Barazani,

Cees G. M. Snoek,

Yuki M. Asano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dorkenwald_2024_CVPR, author = {Dorkenwald, Michael and Barazani, Nimrod and Snoek, Cees G. M. and Asano, Yuki M.}, title = {PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13548-13558} }
UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence: Ruihai Wu,

Haoran Lu,

Yiyan Wang,

Yubo Wang,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Ruihai and Lu, Haoran and Wang, Yiyan and Wang, Yubo and Dong, Hao}, title = {UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16340-16350} }
Multi-Attribute Interactions Matter for 3D Visual Grounding: Can Xu,

Yuehui Han,

Rui Xu,

Le Hui,

Jin Xie,

Jian Yang; [pdf]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Can and Han, Yuehui and Xu, Rui and Hui, Le and Xie, Jin and Yang, Jian}, title = {Multi-Attribute Interactions Matter for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17253-17262} }
SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image: Yunhao Li,

Xiaodong Wang,

Ping Wang,

Xin Yuan,

Peidong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yunhao and Wang, Xiaodong and Wang, Ping and Yuan, Xin and Liu, Peidong}, title = {SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10542-10552} }
Improved Visual Grounding through Self-Consistent Explanations: Ruozhen He,

Paola Cascante-Bonilla,

Ziyan Yang,

Alexander C. Berg,

Vicente Ordonez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Ruozhen and Cascante-Bonilla, Paola and Yang, Ziyan and Berg, Alexander C. and Ordonez, Vicente}, title = {Improved Visual Grounding through Self-Consistent Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13095-13105} }
DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement: Jiuming Liu,

Guangming Wang,

Weicai Ye,

Chaokang Jiang,

Jinru Han,

Zhe Liu,

Guofeng Zhang,

Dalong Du,

Hesheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jiuming and Wang, Guangming and Ye, Weicai and Jiang, Chaokang and Han, Jinru and Liu, Zhe and Zhang, Guofeng and Du, Dalong and Wang, Hesheng}, title = {DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15109-15119} }
FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models: Lin Zhao,

Tianchen Zhao,

Zinan Lin,

Xuefei Ning,

Guohao Dai,

Huazhong Yang,

Yu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lin and Zhao, Tianchen and Lin, Zinan and Ning, Xuefei and Dai, Guohao and Yang, Huazhong and Wang, Yu}, title = {FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16122-16131} }
View From Above: Orthogonal-View aware Cross-view Localization: Shan Wang,

Chuong Nguyen,

Jiawei Liu,

Yanhao Zhang,

Sundaram Muthu,

Fahira Afzal Maken,

Kaihao Zhang,

Hongdong Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shan and Nguyen, Chuong and Liu, Jiawei and Zhang, Yanhao and Muthu, Sundaram and Maken, Fahira Afzal and Zhang, Kaihao and Li, Hongdong}, title = {View From Above: Orthogonal-View aware Cross-view Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14843-14852} }
PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition: Haosong Zhang,

Mei Chee Leong,

Liyuan Li,

Weisi Lin; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haosong and Leong, Mei Chee and Li, Liyuan and Lin, Weisi}, title = {PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18857-18867} }
DeepCache: Accelerating Diffusion Models for Free: Xinyin Ma,

Gongfan Fang,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {DeepCache: Accelerating Diffusion Models for Free}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15762-15772} }
Learning Correlation Structures for Vision Transformers: Manjin Kim,

Paul Hongsuck Seo,

Cordelia Schmid,

Minsu Cho; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Manjin and Seo, Paul Hongsuck and Schmid, Cordelia and Cho, Minsu}, title = {Learning Correlation Structures for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18941-18951} }
PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation: Ruining Deng,

Quan Liu,

Can Cui,

Tianyuan Yao,

Jialin Yue,

Juming Xiong,

Lining Yu,

Yifei Wu,

Mengmeng Yin,

Yu Wang,

Shilin Zhao,

Yucheng Tang,

Haichun Yang,

Yuankai Huo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Ruining and Liu, Quan and Cui, Can and Yao, Tianyuan and Yue, Jialin and Xiong, Juming and Yu, Lining and Wu, Yifei and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Tang, Yucheng and Yang, Haichun and Huo, Yuankai}, title = {PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11736-11746} }
Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling: Kranthi Kumar Rachavarapu,

Kalyan Ramakrishnan,

Rajagopalan A. N.; [pdf] [supp]
[bibtex]
@InProceedings{Rachavarapu_2024_CVPR, author = {Rachavarapu, Kranthi Kumar and Ramakrishnan, Kalyan and N., Rajagopalan A.}, title = {Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18952-18962} }
Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering: Vivek Gopalakrishnan,

Neel Dey,

Polina Golland; [pdf] [supp]
[bibtex]
@InProceedings{Gopalakrishnan_2024_CVPR, author = {Gopalakrishnan, Vivek and Dey, Neel and Golland, Polina}, title = {Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11662-11672} }
MICap: A Unified Model for Identity-Aware Movie Descriptions: Haran Raajesh,

Naveen Reddy Desanur,

Zeeshan Khan,

Makarand Tapaswi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Raajesh_2024_CVPR, author = {Raajesh, Haran and Desanur, Naveen Reddy and Khan, Zeeshan and Tapaswi, Makarand}, title = {MICap: A Unified Model for Identity-Aware Movie Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14011-14021} }
MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models: Yasiru Ranasinghe,

Deepti Hegde,

Vishal M. Patel; [pdf] [supp]
[bibtex]
@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Hegde, Deepti and Patel, Vishal M.}, title = {MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10659-10670} }
An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning: Jianqing Zhang,

Yang Liu,

Yang Hua,

Jian Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jianqing and Liu, Yang and Hua, Yang and Cao, Jian}, title = {An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12109-12119} }
Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation: Xiaohan Lei,

Min Wang,

Wengang Zhou,

Li Li,

Houqiang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2024_CVPR, author = {Lei, Xiaohan and Wang, Min and Zhou, Wengang and Li, Li and Li, Houqiang}, title = {Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16329-16339} }
One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion: Minghua Liu,

Ruoxi Shi,

Linghao Chen,

Zhuoyang Zhang,

Chao Xu,

Xinyue Wei,

Hansheng Chen,

Chong Zeng,

Jiayuan Gu,

Hao Su; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Minghua and Shi, Ruoxi and Chen, Linghao and Zhang, Zhuoyang and Xu, Chao and Wei, Xinyue and Chen, Hansheng and Zeng, Chong and Gu, Jiayuan and Su, Hao}, title = {One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10072-10083} }
Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation: Shanshan Zhong,

Zhongzhan Huang,

Shanghua Gao,

Wushao Wen,

Liang Lin,

Marinka Zitnik,

Pan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Zhong_2024_CVPR, author = {Zhong, Shanshan and Huang, Zhongzhan and Gao, Shanghua and Wen, Wushao and Lin, Liang and Zitnik, Marinka and Zhou, Pan}, title = {Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13246-13257} }
SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes: Alexandros Delitzas,

Ayca Takmaz,

Federico Tombari,

Robert Sumner,

Marc Pollefeys,

Francis Engelmann; [pdf] [supp]
[bibtex]
@InProceedings{Delitzas_2024_CVPR, author = {Delitzas, Alexandros and Takmaz, Ayca and Tombari, Federico and Sumner, Robert and Pollefeys, Marc and Engelmann, Francis}, title = {SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14531-14542} }
Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning: Wei Zhang,

Chaoqun Wan,

Tongliang Liu,

Xinmei Tian,

Xu Shen,

Jieping Ye; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wei and Wan, Chaoqun and Liu, Tongliang and Tian, Xinmei and Shen, Xu and Ye, Jieping}, title = {Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18504-18515} }
UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation: Hong Li,

Yutang Feng,

Song Xue,

Xuhui Liu,

Bohan Zeng,

Shanglin Li,

Boyu Liu,

Jianzhuang Liu,

Shumin Han,

Baochang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hong and Feng, Yutang and Xue, Song and Liu, Xuhui and Zeng, Bohan and Li, Shanglin and Liu, Boyu and Liu, Jianzhuang and Han, Shumin and Zhang, Baochang}, title = {UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10585-10595} }
A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification: Zexian Yang,

Dayan Wu,

Chenming Wu,

Zheng Lin,

Jingzi Gu,

Weiping Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zexian and Wu, Dayan and Wu, Chenming and Lin, Zheng and Gu, Jingzi and Wang, Weiping}, title = {A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17343-17353} }
NetTrack: Tracking Highly Dynamic Objects with a Net: Guangze Zheng,

Shijie Lin,

Haobo Zuo,

Changhong Fu,

Jia Pan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Guangze and Lin, Shijie and Zuo, Haobo and Fu, Changhong and Pan, Jia}, title = {NetTrack: Tracking Highly Dynamic Objects with a Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19145-19155} }
Grounded Question-Answering in Long Egocentric Videos: Shangzhe Di,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Di_2024_CVPR, author = {Di, Shangzhe and Xie, Weidi}, title = {Grounded Question-Answering in Long Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12934-12943} }
HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention: Xiaolong Tang,

Meina Kan,

Shiguang Shan,

Zhilong Ji,

Jinfeng Bai,

Xilin Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Xiaolong and Kan, Meina and Shan, Shiguang and Ji, Zhilong and Bai, Jinfeng and Chen, Xilin}, title = {HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15261-15270} }
SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology: Saarthak Kapse,

Pushpak Pati,

Srijan Das,

Jingwei Zhang,

Chao Chen,

Maria Vakalopoulou,

Joel Saltz,

Dimitris Samaras,

Rajarsi R. Gupta,

Prateek Prasanna; [pdf] [supp]
[bibtex]
@InProceedings{Kapse_2024_CVPR, author = {Kapse, Saarthak and Pati, Pushpak and Das, Srijan and Zhang, Jingwei and Chen, Chao and Vakalopoulou, Maria and Saltz, Joel and Samaras, Dimitris and Gupta, Rajarsi R. and Prasanna, Prateek}, title = {SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11226-11237} }
LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding: Min Liang,

Jia-Wei Ma,

Xiaobin Zhu,

Jingyan Qin,

Xu-Cheng Yin; [pdf]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Min and Ma, Jia-Wei and Zhu, Xiaobin and Qin, Jingyan and Yin, Xu-Cheng}, title = {LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15665-15674} }
GLOW: Global Layout Aware Attacks on Object Detection: Jun Bao,

Buyu Liu,

Kui Ren,

Jun Yu; [pdf] [supp]
[bibtex]
@InProceedings{Bao_2024_CVPR, author = {Bao, Jun and Liu, Buyu and Ren, Kui and Yu, Jun}, title = {GLOW: Global Layout Aware Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12057-12066} }
SIRA: Scalable Inter-frame Relation and Association for Radar Perception: Ryoma Yataka,

Pu Wang,

Petros Boufounos,

Ryuhei Takahashi; [pdf] [supp]
[bibtex]
@InProceedings{Yataka_2024_CVPR, author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros and Takahashi, Ryuhei}, title = {SIRA: Scalable Inter-frame Relation and Association for Radar Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15024-15034} }
VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment: Phong Tran,

Egor Zakharov,

Long-Nhat Ho,

Anh Tuan Tran,

Liwen Hu,

Hao Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tran_2024_CVPR, author = {Tran, Phong and Zakharov, Egor and Ho, Long-Nhat and Tran, Anh Tuan and Hu, Liwen and Li, Hao}, title = {VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10336-10348} }
Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation: Yunhao Ge,

Xiaohui Zeng,

Jacob Samuel Huffman,

Tsung-Yi Lin,

Ming-Yu Liu,

Yin Cui; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ge_2024_CVPR, author = {Ge, Yunhao and Zeng, Xiaohui and Huffman, Jacob Samuel and Lin, Tsung-Yi and Liu, Ming-Yu and Cui, Yin}, title = {Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14033-14042} }
Communication-Efficient Collaborative Perception via Information Filling with Codebook: Yue Hu,

Juntong Peng,

Sifei Liu,

Junhao Ge,

Si Liu,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Yue and Peng, Juntong and Liu, Sifei and Ge, Junhao and Liu, Si and Chen, Siheng}, title = {Communication-Efficient Collaborative Perception via Information Filling with Codebook}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15481-15490} }
MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation: Hanzhe Hu,

Zhizhuo Zhou,

Varun Jampani,

Shubham Tulsiani; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Hanzhe and Zhou, Zhizhuo and Jampani, Varun and Tulsiani, Shubham}, title = {MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9698-9707} }
Effective Video Mirror Detection with Inconsistent Motion Cues: Alex Warren,

Ke Xu,

Jiaying Lin,

Gary K.L. Tam,

Rynson W.H. Lau; [pdf] [supp]
[bibtex]
@InProceedings{Warren_2024_CVPR, author = {Warren, Alex and Xu, Ke and Lin, Jiaying and Tam, Gary K.L. and Lau, Rynson W.H.}, title = {Effective Video Mirror Detection with Inconsistent Motion Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17244-17252} }
DiffLoc: Diffusion Model for Outdoor LiDAR Localization: Wen Li,

Yuyang Yang,

Shangshu Yu,

Guosheng Hu,

Chenglu Wen,

Ming Cheng,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Wen and Yang, Yuyang and Yu, Shangshu and Hu, Guosheng and Wen, Chenglu and Cheng, Ming and Wang, Cheng}, title = {DiffLoc: Diffusion Model for Outdoor LiDAR Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15045-15054} }
On Scaling Up a Multilingual Vision and Language Model: Xi Chen,

Josip Djolonga,

Piotr Padlewski,

Basil Mustafa,

Soravit Changpinyo,

Jialin Wu,

Carlos Riquelme Ruiz,

Sebastian Goodman,

Xiao Wang,

Yi Tay,

Siamak Shakeri,

Mostafa Dehghani,

Daniel Salz,

Mario Lucic,

Michael Tschannen,

Arsha Nagrani,

Hexiang Hu,

Mandar Joshi,

Bo Pang,

Ceslee Montgomery,

Paulina Pietrzyk,

Marvin Ritter,

AJ Piergiovanni,

Matthias Minderer,

Filip Pavetic,

Austin Waters,

Gang Li,

Ibrahim Alabdulmohsin,

Lucas Beyer,

Julien Amelot,

Kenton Lee,

Andreas Peter Steiner,

Yang Li,

Daniel Keysers,

Anurag Arnab,

Yuanzhong Xu,

Keran Rong,

Alexander Kolesnikov,

Mojtaba Seyedhosseini,

Anelia Angelova,

Xiaohua Zhai,

Neil Houlsby,

Radu Soricut; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Djolonga, Josip and Padlewski, Piotr and Mustafa, Basil and Changpinyo, Soravit and Wu, Jialin and Ruiz, Carlos Riquelme and Goodman, Sebastian and Wang, Xiao and Tay, Yi and Shakeri, Siamak and Dehghani, Mostafa and Salz, Daniel and Lucic, Mario and Tschannen, Michael and Nagrani, Arsha and Hu, Hexiang and Joshi, Mandar and Pang, Bo and Montgomery, Ceslee and Pietrzyk, Paulina and Ritter, Marvin and Piergiovanni, AJ and Minderer, Matthias and Pavetic, Filip and Waters, Austin and Li, Gang and Alabdulmohsin, Ibrahim and Beyer, Lucas and Amelot, Julien and Lee, Kenton and Steiner, Andreas Peter and Li, Yang and Keysers, Daniel and Arnab, Anurag and Xu, Yuanzhong and Rong, Keran and Kolesnikov, Alexander and Seyedhosseini, Mojtaba and Angelova, Anelia and Zhai, Xiaohua and Houlsby, Neil and Soricut, Radu}, title = {On Scaling Up a Multilingual Vision and Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14432-14444} }
Day-Night Cross-domain Vehicle Re-identification: Hongchao Li,

Jingong Chen,

Aihua Zheng,

Yong Wu,

Yonglong Luo; [pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hongchao and Chen, Jingong and Zheng, Aihua and Wu, Yong and Luo, Yonglong}, title = {Day-Night Cross-domain Vehicle Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12626-12635} }
Holodeck: Language Guided Generation of 3D Embodied AI Environments: Yue Yang,

Fan-Yun Sun,

Luca Weihs,

Eli VanderBilt,

Alvaro Herrasti,

Winson Han,

Jiajun Wu,

Nick Haber,

Ranjay Krishna,

Lingjie Liu,

Chris Callison-Burch,

Mark Yatskar,

Aniruddha Kembhavi,

Christopher Clark; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yue and Sun, Fan-Yun and Weihs, Luca and VanderBilt, Eli and Herrasti, Alvaro and Han, Winson and Wu, Jiajun and Haber, Nick and Krishna, Ranjay and Liu, Lingjie and Callison-Burch, Chris and Yatskar, Mark and Kembhavi, Aniruddha and Clark, Christopher}, title = {Holodeck: Language Guided Generation of 3D Embodied AI Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16227-16237} }
Distilled Datamodel with Reverse Gradient Matching: Jingwen Ye,

Ruonan Yu,

Songhua Liu,

Xinchao Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Yu, Ruonan and Liu, Songhua and Wang, Xinchao}, title = {Distilled Datamodel with Reverse Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11954-11963} }
Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection: Zhanwei Zhang,

Minghao Chen,

Shuai Xiao,

Liang Peng,

Hengjia Li,

Binbin Lin,

Ping Li,

Wenxiao Wang,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhanwei and Chen, Minghao and Xiao, Shuai and Peng, Liang and Li, Hengjia and Lin, Binbin and Li, Ping and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15291-15300} }
Reconstructing Hands in 3D with Transformers: Georgios Pavlakos,

Dandan Shan,

Ilija Radosavovic,

Angjoo Kanazawa,

David Fouhey,

Jitendra Malik; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pavlakos_2024_CVPR, author = {Pavlakos, Georgios and Shan, Dandan and Radosavovic, Ilija and Kanazawa, Angjoo and Fouhey, David and Malik, Jitendra}, title = {Reconstructing Hands in 3D with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9826-9836} }
PELA: Learning Parameter-Efficient Models with Low-Rank Approximation: Yangyang Guo,

Guangzhi Wang,

Mohan Kankanhalli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Yangyang and Wang, Guangzhi and Kankanhalli, Mohan}, title = {PELA: Learning Parameter-Efficient Models with Low-Rank Approximation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15699-15709} }
Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch: Xidong Wu,

Shangqian Gao,

Zeyu Zhang,

Zhenzhen Li,

Runxue Bao,

Yanfu Zhang,

Xiaoqian Wang,

Heng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xidong and Gao, Shangqian and Zhang, Zeyu and Li, Zhenzhen and Bao, Runxue and Zhang, Yanfu and Wang, Xiaoqian and Huang, Heng}, title = {Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16163-16173} }
Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation: Qinghe Ma,

Jian Zhang,

Lei Qi,

Qian Yu,

Yinghuan Shi,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Qinghe and Zhang, Jian and Qi, Lei and Yu, Qian and Shi, Yinghuan and Gao, Yang}, title = {Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11642-11651} }
From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding: Yong-Lu Li,

Xiaoqian Wu,

Xinpeng Liu,

Zehao Wang,

Yiming Dou,

Yikun Ji,

Junyi Zhang,

Yixing Li,

Xudong Lu,

Jingru Tan,

Cewu Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yong-Lu and Wu, Xiaoqian and Liu, Xinpeng and Wang, Zehao and Dou, Yiming and Ji, Yikun and Zhang, Junyi and Li, Yixing and Lu, Xudong and Tan, Jingru and Lu, Cewu}, title = {From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16582-16592} }
Bootstrapping Autonomous Driving Radars with Self-Supervised Learning: Yiduo Hao,

Sohrab Madani,

Junfeng Guan,

Mohammed Alloulah,

Saurabh Gupta,

Haitham Hassanieh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hao_2024_CVPR, author = {Hao, Yiduo and Madani, Sohrab and Guan, Junfeng and Alloulah, Mohammed and Gupta, Saurabh and Hassanieh, Haitham}, title = {Bootstrapping Autonomous Driving Radars with Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15012-15023} }
Weakly Supervised Monocular 3D Detection with a Single-View Image: Xueying Jiang,

Sheng Jin,

Lewei Lu,

Xiaoqin Zhang,

Shijian Lu; [pdf] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xueying and Jin, Sheng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Weakly Supervised Monocular 3D Detection with a Single-View Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10508-10518} }
Blind Image Quality Assessment Based on Geometric Order Learning: Nyeong-Ho Shin,

Seon-Ho Lee,

Chang-Su Kim; [pdf] [supp]
[bibtex]
@InProceedings{Shin_2024_CVPR, author = {Shin, Nyeong-Ho and Lee, Seon-Ho and Kim, Chang-Su}, title = {Blind Image Quality Assessment Based on Geometric Order Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12799-12808} }
Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction: Hao Li,

Ying Chen,

Yifei Chen,

Rongshan Yu,

Wenxian Yang,

Liansheng Wang,

Bowen Ding,

Yuchen Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hao and Chen, Ying and Chen, Yifei and Yu, Rongshan and Yang, Wenxian and Wang, Liansheng and Ding, Bowen and Han, Yuchen}, title = {Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11398-11407} }
Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge: Haoxiang Ma,

Modi Shi,

Boyang Gao,

Di Huang; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Haoxiang and Shi, Modi and Gao, Boyang and Huang, Di}, title = {Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18102-18111} }
RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation: Oded Bialer,

Yuval Haitman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bialer_2024_CVPR, author = {Bialer, Oded and Haitman, Yuval}, title = {RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15407-15416} }
3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling: Chaokang Jiang,

Guangming Wang,

Jiuming Liu,

Hesheng Wang,

Zhuang Ma,

Zhenqiang Liu,

Zhujin Liang,

Yi Shan,

Dalong Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Chaokang and Wang, Guangming and Liu, Jiuming and Wang, Hesheng and Ma, Zhuang and Liu, Zhenqiang and Liang, Zhujin and Shan, Yi and Du, Dalong}, title = {3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15173-15183} }
Question Aware Vision Transformer for Multimodal Reasoning: Roy Ganz,

Yair Kittenplon,

Aviad Aberdam,

Elad Ben Avraham,

Oren Nuriel,

Shai Mazor,

Ron Litman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganz_2024_CVPR, author = {Ganz, Roy and Kittenplon, Yair and Aberdam, Aviad and Ben Avraham, Elad and Nuriel, Oren and Mazor, Shai and Litman, Ron}, title = {Question Aware Vision Transformer for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13861-13871} }
OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition: Tongjia Chen,

Hongshan Yu,

Zhengeng Yang,

Zechuan Li,

Wei Sun,

Chen Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tongjia and Yu, Hongshan and Yang, Zhengeng and Li, Zechuan and Sun, Wei and Chen, Chen}, title = {OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18888-18898} }
Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation: Mukul Khanna,

Yongsen Mao,

Hanxiao Jiang,

Sanjay Haresh,

Brennan Shacklett,

Dhruv Batra,

Alexander Clegg,

Eric Undersander,

Angel X. Chang,

Manolis Savva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Mao, Yongsen and Jiang, Hanxiao and Haresh, Sanjay and Shacklett, Brennan and Batra, Dhruv and Clegg, Alexander and Undersander, Eric and Chang, Angel X. and Savva, Manolis}, title = {Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16384-16393} }
NViST: In the Wild New View Synthesis from a Single Image with Transformers: Wonbong Jang,

Lourdes Agapito; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Wonbong and Agapito, Lourdes}, title = {NViST: In the Wild New View Synthesis from a Single Image with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10181-10193} }
Step Differences in Instructional Video: Tushar Nagarajan,

Lorenzo Torresani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagarajan_2024_CVPR, author = {Nagarajan, Tushar and Torresani, Lorenzo}, title = {Step Differences in Instructional Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18740-18750} }
Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data: Lihe Yang,

Bingyi Kang,

Zilong Huang,

Xiaogang Xu,

Jiashi Feng,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, title = {Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10371-10381} }
MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization: Jimin Xu,

Tianbao Wang,

Tao Jin,

Shengyu Zhang,

Dongjie Fu,

Zhe Wang,

Jiangjing Lyu,

Chengfei Lv,

Chaoyue Niu,

Zhou Yu,

Zhou Zhao,

Fei Wu; [pdf]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jimin and Wang, Tianbao and Jin, Tao and Zhang, Shengyu and Fu, Dongjie and Wang, Zhe and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Yu, Zhou and Zhao, Zhou and Wu, Fei}, title = {MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10682-10692} }
UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization: Shuaibo Li,

Wei Ma,

Jianwei Guo,

Shibiao Xu,

Benchong Li,

Xiaopeng Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Shuaibo and Ma, Wei and Guo, Jianwei and Xu, Shibiao and Li, Benchong and Zhang, Xiaopeng}, title = {UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12523-12533} }
Situational Awareness Matters in 3D Vision Language Reasoning: Yunze Man,

Liang-Yan Gui,

Yu-Xiong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Man_2024_CVPR, author = {Man, Yunze and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Situational Awareness Matters in 3D Vision Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13678-13688} }
RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection: Zhiwei Lin,

Zhe Liu,

Zhongyu Xia,

Xinhao Wang,

Yongtao Wang,

Shengxiang Qi,

Yang Dong,

Nan Dong,

Le Zhang,

Ce Zhu; [pdf]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Zhiwei and Liu, Zhe and Xia, Zhongyu and Wang, Xinhao and Wang, Yongtao and Qi, Shengxiang and Dong, Yang and Dong, Nan and Zhang, Le and Zhu, Ce}, title = {RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14928-14937} }
Adaptive Softassign via Hadamard-Equipped Sinkhorn: Binrui Shen,

Qiang Niu,

Shengxin Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Binrui and Niu, Qiang and Zhu, Shengxin}, title = {Adaptive Softassign via Hadamard-Equipped Sinkhorn}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17638-17647} }
Re-thinking Data Availability Attacks Against Deep Neural Networks: Bin Fang,

Bo Li,

Shuang Wu,

Shouhong Ding,

Ran Yi,

Lizhuang Ma; [pdf] [supp]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Bin and Li, Bo and Wu, Shuang and Ding, Shouhong and Yi, Ran and Ma, Lizhuang}, title = {Re-thinking Data Availability Attacks Against Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12215-12224} }
SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection: Mingxuan Liu,

Tyler L. Hayes,

Elisa Ricci,

Gabriela Csurka,

Riccardo Volpi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Mingxuan and Hayes, Tyler L. and Ricci, Elisa and Csurka, Gabriela and Volpi, Riccardo}, title = {SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16634-16644} }
Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels: Tianming Liang,

Chaolei Tan,

Beihao Xia,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Tianming and Tan, Chaolei and Xia, Beihao and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13161-13170} }
Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes: Liqiong Wang,

Jinyu Yang,

Yanfu Zhang,

Fangyi Wang,

Feng Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Liqiong and Yang, Jinyu and Zhang, Yanfu and Wang, Fangyi and Zheng, Feng}, title = {Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17201-17211} }
Solving the Catastrophic Forgetting Problem in Generalized Category Discovery: Xinzi Cao,

Xiawu Zheng,

Guanhong Wang,

Weijiang Yu,

Yunhang Shen,

Ke Li,

Yutong Lu,

Yonghong Tian; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Xinzi and Zheng, Xiawu and Wang, Guanhong and Yu, Weijiang and Shen, Yunhang and Li, Ke and Lu, Yutong and Tian, Yonghong}, title = {Solving the Catastrophic Forgetting Problem in Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16880-16889} }
Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images: JungEun Kim,

Hangyul Yoon,

Geondo Park,

Kyungsu Kim,

Eunho Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, JungEun and Yoon, Hangyul and Park, Geondo and Kim, Kyungsu and Yang, Eunho}, title = {Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11353-11364} }
Learning the 3D Fauna of the Web: Zizhang Li,

Dor Litvak,

Ruining Li,

Yunzhi Zhang,

Tomas Jakab,

Christian Rupprecht,

Shangzhe Wu,

Andrea Vedaldi,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zizhang and Litvak, Dor and Li, Ruining and Zhang, Yunzhi and Jakab, Tomas and Rupprecht, Christian and Wu, Shangzhe and Vedaldi, Andrea and Wu, Jiajun}, title = {Learning the 3D Fauna of the Web}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9752-9762} }
LISA: Reasoning Segmentation via Large Language Model: Xin Lai,

Zhuotao Tian,

Yukang Chen,

Yanwei Li,

Yuhui Yuan,

Shu Liu,

Jiaya Jia; [pdf] [arXiv]
[bibtex]
@InProceedings{Lai_2024_CVPR, author = {Lai, Xin and Tian, Zhuotao and Chen, Yukang and Li, Yanwei and Yuan, Yuhui and Liu, Shu and Jia, Jiaya}, title = {LISA: Reasoning Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9579-9589} }
Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection: Yicheng Xiao,

Zhuoyan Luo,

Yong Liu,

Yue Ma,

Hengwei Bian,

Yatai Ji,

Yujiu Yang,

Xiu Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yicheng and Luo, Zhuoyan and Liu, Yong and Ma, Yue and Bian, Hengwei and Ji, Yatai and Yang, Yujiu and Li, Xiu}, title = {Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18709-18719} }
MuseChat: A Conversational Music Recommendation System for Videos: Zhikang Dong,

Xiulong Liu,

Bin Chen,

Pawel Polak,

Peng Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Zhikang and Liu, Xiulong and Chen, Bin and Polak, Pawel and Zhang, Peng}, title = {MuseChat: A Conversational Music Recommendation System for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12775-12785} }
Device-Wise Federated Network Pruning: Shangqian Gao,

Junyi Li,

Zeyu Zhang,

Yanfu Zhang,

Weidong Cai,

Heng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Li, Junyi and Zhang, Zeyu and Zhang, Yanfu and Cai, Weidong and Huang, Heng}, title = {Device-Wise Federated Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12342-12352} }
MoReVQA: Exploring Modular Reasoning Models for Video Question Answering: Juhong Min,

Shyamal Buch,

Arsha Nagrani,

Minsu Cho,

Cordelia Schmid; [pdf] [arXiv]
[bibtex]
@InProceedings{Min_2024_CVPR, author = {Min, Juhong and Buch, Shyamal and Nagrani, Arsha and Cho, Minsu and Schmid, Cordelia}, title = {MoReVQA: Exploring Modular Reasoning Models for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13235-13245} }
Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach: Wei Dong,

Xing Zhang,

Bihui Chen,

Dawei Yan,

Zhijun Lin,

Qingsen Yan,

Peng Wang,

Yang Yang; [pdf] [supp]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Wei and Zhang, Xing and Chen, Bihui and Yan, Dawei and Lin, Zhijun and Yan, Qingsen and Wang, Peng and Yang, Yang}, title = {Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16101-16110} }
Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification: Kunlun Xu,

Xu Zou,

Yuxin Peng,

Jiahuan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Kunlun and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16604-16613} }
Generating Enhanced Negatives for Training Language-Based Object Detectors: Shiyu Zhao,

Long Zhao,

Vijay Kumar B G,

Yumin Suh,

Dimitris N. Metaxas,

Manmohan Chandraker,

Samuel Schulter; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Zhao, Long and G, Vijay Kumar B and Suh, Yumin and Metaxas, Dimitris N. and Chandraker, Manmohan and Schulter, Samuel}, title = {Generating Enhanced Negatives for Training Language-Based Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13592-13602} }
FedAS: Bridging Inconsistency in Personalized Federated Learning: Xiyuan Yang,

Wenke Huang,

Mang Ye; [pdf]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Xiyuan and Huang, Wenke and Ye, Mang}, title = {FedAS: Bridging Inconsistency in Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11986-11995} }
MoST: Multi-Modality Scene Tokenization for Motion Prediction: Norman Mu,

Jingwei Ji,

Zhenpei Yang,

Nate Harada,

Haotian Tang,

Kan Chen,

Charles R. Qi,

Runzhou Ge,

Kratarth Goel,

Zoey Yang,

Scott Ettinger,

Rami Al-Rfou,

Dragomir Anguelov,

Yin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2024_CVPR, author = {Mu, Norman and Ji, Jingwei and Yang, Zhenpei and Harada, Nate and Tang, Haotian and Chen, Kan and Qi, Charles R. and Ge, Runzhou and Goel, Kratarth and Yang, Zoey and Ettinger, Scott and Al-Rfou, Rami and Anguelov, Dragomir and Zhou, Yin}, title = {MoST: Multi-Modality Scene Tokenization for Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14988-14999} }
PIGEON: Predicting Image Geolocations: Lukas Haas,

Michal Skreta,

Silas Alberti,

Chelsea Finn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Haas_2024_CVPR, author = {Haas, Lukas and Skreta, Michal and Alberti, Silas and Finn, Chelsea}, title = {PIGEON: Predicting Image Geolocations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12893-12902} }
Flow-Guided Online Stereo Rectification for Wide Baseline Stereo: Anush Kumar,

Fahim Mannan,

Omid Hosseini Jafari,

Shile Li,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Anush and Mannan, Fahim and Jafari, Omid Hosseini and Li, Shile and Heide, Felix}, title = {Flow-Guided Online Stereo Rectification for Wide Baseline Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15375-15385} }
Driving Everywhere with Large Language Model Policy Adaptation: Boyi Li,

Yue Wang,

Jiageng Mao,

Boris Ivanovic,

Sushant Veer,

Karen Leung,

Marco Pavone; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Boyi and Wang, Yue and Mao, Jiageng and Ivanovic, Boris and Veer, Sushant and Leung, Karen and Pavone, Marco}, title = {Driving Everywhere with Large Language Model Policy Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14948-14957} }
Koala: Key Frame-Conditioned Long Video-LLM: Reuben Tan,

Ximeng Sun,

Ping Hu,

Jui-hsien Wang,

Hanieh Deilamsalehy,

Bryan A. Plummer,

Bryan Russell,

Kate Saenko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Reuben and Sun, Ximeng and Hu, Ping and Wang, Jui-hsien and Deilamsalehy, Hanieh and Plummer, Bryan A. and Russell, Bryan and Saenko, Kate}, title = {Koala: Key Frame-Conditioned Long Video-LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13581-13591} }
HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models: Tianrui Guan,

Fuxiao Liu,

Xiyang Wu,

Ruiqi Xian,

Zongxia Li,

Xiaoyu Liu,

Xijun Wang,

Lichang Chen,

Furong Huang,

Yaser Yacoob,

Dinesh Manocha,

Tianyi Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guan_2024_CVPR, author = {Guan, Tianrui and Liu, Fuxiao and Wu, Xiyang and Xian, Ruiqi and Li, Zongxia and Liu, Xiaoyu and Wang, Xijun and Chen, Lichang and Huang, Furong and Yacoob, Yaser and Manocha, Dinesh and Zhou, Tianyi}, title = {HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14375-14385} }
ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection: Yichen Bai,

Zongbo Han,

Bing Cao,

Xiaoheng Jiang,

Qinghua Hu,

Changqing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bai_2024_CVPR, author = {Bai, Yichen and Han, Zongbo and Cao, Bing and Jiang, Xiaoheng and Hu, Qinghua and Zhang, Changqing}, title = {ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17480-17489} }
Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model: Shraman Pramanick,

Guangxing Han,

Rui Hou,

Sayan Nag,

Ser-Nam Lim,

Nicolas Ballas,

Qifan Wang,

Rama Chellappa,

Amjad Almahairi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pramanick_2024_CVPR, author = {Pramanick, Shraman and Han, Guangxing and Hou, Rui and Nag, Sayan and Lim, Ser-Nam and Ballas, Nicolas and Wang, Qifan and Chellappa, Rama and Almahairi, Amjad}, title = {Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14076-14088} }
SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System: Yunfei Fan,

Tianyu Zhao,

Guidong Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Yunfei and Zhao, Tianyu and Wang, Guidong}, title = {SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17964-17973} }
ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts: Mu Cai,

Haotian Liu,

Siva Karthik Mustikovela,

Gregory P. Meyer,

Yuning Chai,

Dennis Park,

Yong Jae Lee; [pdf] [supp]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Mu and Liu, Haotian and Mustikovela, Siva Karthik and Meyer, Gregory P. and Chai, Yuning and Park, Dennis and Lee, Yong Jae}, title = {ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12914-12923} }
OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation: Ganlong Zhao,

Guanbin Li,

Weikai Chen,

Yizhou Yu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ganlong and Li, Guanbin and Chen, Weikai and Yu, Yizhou}, title = {OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16296-16306} }
All Rivers Run to the Sea: Private Learning with Asymmetric Flows: Yue Niu,

Ramy E. Ali,

Saurav Prakash,

Salman Avestimehr; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niu_2024_CVPR, author = {Niu, Yue and Ali, Ramy E. and Prakash, Saurav and Avestimehr, Salman}, title = {All Rivers Run to the Sea: Private Learning with Asymmetric Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12353-12362} }
HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions: Hao Xu,

Haipeng Li,

Yinqiao Wang,

Shuaicheng Liu,

Chi-Wing Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Hao and Li, Haipeng and Wang, Yinqiao and Liu, Shuaicheng and Fu, Chi-Wing}, title = {HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10159-10169} }
A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection: Hanshi Wang,

Zhipeng Zhang,

Jin Gao,

Weiming Hu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hanshi and Zhang, Zhipeng and Gao, Jin and Hu, Weiming}, title = {A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14978-14987} }
Visual Objectification in Films: Towards a New AI Task for Video Interpretation: Julie Tores,

Lucile Sassatelli,

Hui-Yin Wu,

Clement Bergman,

Léa Andolfi,

Victor Ecrement,

Frédéric Precioso,

Thierry Devars,

Magali Guaresi,

Virginie Julliard,

Sarah Lecossais; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tores_2024_CVPR, author = {Tores, Julie and Sassatelli, Lucile and Wu, Hui-Yin and Bergman, Clement and Andolfi, L\'ea and Ecrement, Victor and Precioso, Fr\'ed\'eric and Devars, Thierry and Guaresi, Magali and Julliard, Virginie and Lecossais, Sarah}, title = {Visual Objectification in Films: Towards a New AI Task for Video Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10864-10874} }
BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image: Minje Kim,

Tae-Kyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Minje and Kim, Tae-Kyun}, title = {BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10726-10735} }
Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs: Kanchana Ranasinghe,

Satya Narayan Shukla,

Omid Poursaeed,

Michael S. Ryoo,

Tsung-Yu Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Kanchana and Shukla, Satya Narayan and Poursaeed, Omid and Ryoo, Michael S. and Lin, Tsung-Yu}, title = {Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12977-12987} }
Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors: Nicolae-C?t?lin Ristea,

Florinel-Alin Croitoru,

Radu Tudor Ionescu,

Marius Popescu,

Fahad Shahbaz Khan,

Mubarak Shah; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ristea_2024_CVPR, author = {Ristea, Nicolae-C?t?lin and Croitoru, Florinel-Alin and Ionescu, Radu Tudor and Popescu, Marius and Khan, Fahad Shahbaz and Shah, Mubarak}, title = {Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15984-15995} }
Distilling Vision-Language Models on Millions of Videos: Yue Zhao,

Long Zhao,

Xingyi Zhou,

Jialin Wu,

Chun-Te Chu,

Hui Miao,

Florian Schroff,

Hartwig Adam,

Ting Liu,

Boqing Gong,

Philipp Krahenbuhl,

Liangzhe Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yue and Zhao, Long and Zhou, Xingyi and Wu, Jialin and Chu, Chun-Te and Miao, Hui and Schroff, Florian and Adam, Hartwig and Liu, Ting and Gong, Boqing and Krahenbuhl, Philipp and Yuan, Liangzhe}, title = {Distilling Vision-Language Models on Millions of Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13106-13116} }
Generalized Predictive Model for Autonomous Driving: Jiazhi Yang,

Shenyuan Gao,

Yihang Qiu,

Li Chen,

Tianyu Li,

Bo Dai,

Kashyap Chitta,

Penghao Wu,

Jia Zeng,

Ping Luo,

Jun Zhang,

Andreas Geiger,

Yu Qiao,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jiazhi and Gao, Shenyuan and Qiu, Yihang and Chen, Li and Li, Tianyu and Dai, Bo and Chitta, Kashyap and Wu, Penghao and Zeng, Jia and Luo, Ping and Zhang, Jun and Geiger, Andreas and Qiao, Yu and Li, Hongyang}, title = {Generalized Predictive Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14662-14672} }
FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation: Zijia Lu,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18175-18185} }
Test-Time Zero-Shot Temporal Action Localization: Benedetta Liberatori,

Alessandro Conti,

Paolo Rota,

Yiming Wang,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liberatori_2024_CVPR, author = {Liberatori, Benedetta and Conti, Alessandro and Rota, Paolo and Wang, Yiming and Ricci, Elisa}, title = {Test-Time Zero-Shot Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18720-18729} }
AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One: Mike Ranzinger,

Greg Heinrich,

Jan Kautz,

Pavlo Molchanov; [pdf] [supp]
[bibtex]
@InProceedings{Ranzinger_2024_CVPR, author = {Ranzinger, Mike and Heinrich, Greg and Kautz, Jan and Molchanov, Pavlo}, title = {AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12490-12500} }
FastMAC: Stochastic Spectral Sampling of Correspondence Graph: Yifei Zhang,

Hao Zhao,

Hongyang Li,

Siheng Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yifei and Zhao, Hao and Li, Hongyang and Chen, Siheng}, title = {FastMAC: Stochastic Spectral Sampling of Correspondence Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17857-17867} }
FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning: Gihun Lee,

Minchan Jeong,

Sangmook Kim,

Jaehoon Oh,

Se-Young Yun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Gihun and Jeong, Minchan and Kim, Sangmook and Oh, Jaehoon and Yun, Se-Young}, title = {FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12512-12522} }
A Category Agnostic Model for Visual Rearrangment: Yuyi Liu,

Xinhang Song,

Weijie Li,

Xiaohan Wang,

Shuqiang Jiang; [pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yuyi and Song, Xinhang and Li, Weijie and Wang, Xiaohan and Jiang, Shuqiang}, title = {A Category Agnostic Model for Visual Rearrangment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16457-16466} }
Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision: Mohammad Reza Hosseinzadeh Taher,

Michael B. Gotway,

Jianming Liang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Taher_2024_CVPR, author = {Taher, Mohammad Reza Hosseinzadeh and Gotway, Michael B. and Liang, Jianming}, title = {Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11269-11281} }
Efficient Test-Time Adaptation of Vision-Language Models: Adilbek Karmanov,

Dayan Guan,

Shijian Lu,

Abdulmotaleb El Saddik,

Eric Xing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Karmanov_2024_CVPR, author = {Karmanov, Adilbek and Guan, Dayan and Lu, Shijian and El Saddik, Abdulmotaleb and Xing, Eric}, title = {Efficient Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14162-14171} }
Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs: Shengbang Tong,

Zhuang Liu,

Yuexiang Zhai,

Yi Ma,

Yann LeCun,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tong_2024_CVPR, author = {Tong, Shengbang and Liu, Zhuang and Zhai, Yuexiang and Ma, Yi and LeCun, Yann and Xie, Saining}, title = {Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9568-9578} }
Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation: Zhuangzhuang Chen,

Zhuonan Lai,

Jie Chen,

Jianqiang Li; [pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhuangzhuang and Lai, Zhuonan and Chen, Jie and Li, Jianqiang}, title = {Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12698-12708} }
RegionGPT: Towards Region Understanding Vision Language Model: Qiushan Guo,

Shalini De Mello,

Hongxu Yin,

Wonmin Byeon,

Ka Chun Cheung,

Yizhou Yu,

Ping Luo,

Sifei Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Qiushan and De Mello, Shalini and Yin, Hongxu and Byeon, Wonmin and Cheung, Ka Chun and Yu, Yizhou and Luo, Ping and Liu, Sifei}, title = {RegionGPT: Towards Region Understanding Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13796-13806} }
Error Detection in Egocentric Procedural Task Videos: Shih-Po Lee,

Zijia Lu,

Zekun Zhang,

Minh Hoai,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Shih-Po and Lu, Zijia and Zhang, Zekun and Hoai, Minh and Elhamifar, Ehsan}, title = {Error Detection in Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18655-18666} }
Uncertainty-Guided Never-Ending Learning to Drive: Lei Lai,

Eshed Ohn-Bar,

Sanjay Arora,

John Seon Keun Yi; [pdf]
[bibtex]
@InProceedings{Lai_2024_CVPR, author = {Lai, Lei and Ohn-Bar, Eshed and Arora, Sanjay and Yi, John Seon Keun}, title = {Uncertainty-Guided Never-Ending Learning to Drive}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15088-15098} }
FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion: George Cazenavette,

Avneesh Sud,

Thomas Leung,

Ben Usman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cazenavette_2024_CVPR, author = {Cazenavette, George and Sud, Avneesh and Leung, Thomas and Usman, Ben}, title = {FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10759-10769} }
Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability: Yan Huang,

Zhang Zhang,

Qiang Wu,

Yi Zhong,

Liang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yan and Zhang, Zhang and Wu, Qiang and Zhong, Yi and Wang, Liang}, title = {Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17689-17699} }
Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval: Jiamian Wang,

Guohao Sun,

Pichao Wang,

Dongfang Liu,

Sohail Dianat,

Majid Rabbani,

Raghuveer Rao,

Zhiqiang Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jiamian and Sun, Guohao and Wang, Pichao and Liu, Dongfang and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16551-16560} }
Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning: Rui Li,

Tobias Fischer,

Mattia Segu,

Marc Pollefeys,

Luc Van Gool,

Federico Tombari; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Rui and Fischer, Tobias and Segu, Mattia and Pollefeys, Marc and Van Gool, Luc and Tombari, Federico}, title = {Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9848-9858} }
Preserving Fairness Generalization in Deepfake Detection: Li Lin,

Xinan He,

Yan Ju,

Xin Wang,

Feng Ding,

Shu Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Li and He, Xinan and Ju, Yan and Wang, Xin and Ding, Feng and Hu, Shu}, title = {Preserving Fairness Generalization in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16815-16825} }
Structure-Aware Sparse-View X-ray 3D Reconstruction: Yuanhao Cai,

Jiahao Wang,

Alan Yuille,

Zongwei Zhou,

Angtian Wang; [pdf]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Yuanhao and Wang, Jiahao and Yuille, Alan and Zhou, Zongwei and Wang, Angtian}, title = {Structure-Aware Sparse-View X-ray 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11174-11183} }
Dexterous Grasp Transformer: Guo-Hao Xu,

Yi-Lin Wei,

Dian Zheng,

Xiao-Ming Wu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Guo-Hao and Wei, Yi-Lin and Zheng, Dian and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Dexterous Grasp Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17933-17942} }
EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models: Sijie Cheng,

Zhicheng Guo,

Jingwen Wu,

Kechen Fang,

Peng Li,

Huaping Liu,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Sijie and Guo, Zhicheng and Wu, Jingwen and Fang, Kechen and Li, Peng and Liu, Huaping and Liu, Yang}, title = {EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14291-14302} }
Hearing Anything Anywhere: Mason Long Wang,

Ryosuke Sawata,

Samuel Clarke,

Ruohan Gao,

Shangzhe Wu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Mason Long and Sawata, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun}, title = {Hearing Anything Anywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11790-11799} }
PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation: Zhenyu Li,

Shariq Farooq Bhat,

Peter Wonka; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhenyu and Bhat, Shariq Farooq and Wonka, Peter}, title = {PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10016-10025} }
Retrieval-Augmented Egocentric Video Captioning: Jilan Xu,

Yifei Huang,

Junlin Hou,

Guo Chen,

Yuejie Zhang,

Rui Feng,

Weidi Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jilan and Huang, Yifei and Hou, Junlin and Chen, Guo and Zhang, Yuejie and Feng, Rui and Xie, Weidi}, title = {Retrieval-Augmented Egocentric Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13525-13536} }
SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution: Zhixuan Liang,

Yao Mu,

Hengbo Ma,

Masayoshi Tomizuka,

Mingyu Ding,

Ping Luo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Zhixuan and Mu, Yao and Ma, Hengbo and Tomizuka, Masayoshi and Ding, Mingyu and Luo, Ping}, title = {SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16467-16476} }
TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression: Ho-Joong Kim,

Jung-Ho Hong,

Heejo Kong,

Seong-Whan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Ho-Joong and Hong, Jung-Ho and Kong, Heejo and Lee, Seong-Whan}, title = {TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18837-18846} }
PointBeV: A Sparse Approach for BeV Predictions: Loick Chambon,

Eloi Zablocki,

Mickaël Chen,

Florent Bartoccioni,

Patrick Pérez,

Matthieu Cord; [pdf] [supp]
[bibtex]
@InProceedings{Chambon_2024_CVPR, author = {Chambon, Loick and Zablocki, Eloi and Chen, Micka\"el and Bartoccioni, Florent and P\'erez, Patrick and Cord, Matthieu}, title = {PointBeV: A Sparse Approach for BeV Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15195-15204} }
From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior: Jaeho Moon,

Juan Luis Gonzalez Bello,

Byeongjun Kwon,

Munchurl Kim; [pdf] [supp]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Jaeho and Bello, Juan Luis Gonzalez and Kwon, Byeongjun and Kim, Munchurl}, title = {From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10519-10529} }
SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling: Ju-Hee Lee,

Je-Won Kang; [pdf] [supp]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Ju-Hee and Kang, Je-Won}, title = {SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13689-13699} }
Prompt Highlighter: Interactive Control for Multi-Modal LLMs: Yuechen Zhang,

Shengju Qian,

Bohao Peng,

Shu Liu,

Jiaya Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuechen and Qian, Shengju and Peng, Bohao and Liu, Shu and Jia, Jiaya}, title = {Prompt Highlighter: Interactive Control for Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13215-13224} }
Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy: DaeJun Kang,

Dongsuk Kum,

Sanmin Kim; [pdf]
[bibtex]
@InProceedings{Kang_2024_CVPR, author = {Kang, DaeJun and Kum, Dongsuk and Kim, Sanmin}, title = {Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15438-15448} }
EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection: Xuanyu Zhang,

Runyi Li,

Jiwen Yu,

Youmin Xu,

Weiqi Li,

Jian Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuanyu and Li, Runyi and Yu, Jiwen and Xu, Youmin and Li, Weiqi and Zhang, Jian}, title = {EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11964-11974} }
FairRAG: Fair Human Generation via Fair Retrieval Augmentation: Robik Shrestha,

Yang Zou,

Qiuyu Chen,

Zhiheng Li,

Yusheng Xie,

Siqi Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shrestha_2024_CVPR, author = {Shrestha, Robik and Zou, Yang and Chen, Qiuyu and Li, Zhiheng and Xie, Yusheng and Deng, Siqi}, title = {FairRAG: Fair Human Generation via Fair Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11996-12005} }
Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation: Xianghui Xie,

Bharat Lal Bhatnagar,

Jan Eric Lenssen,

Gerard Pons-Moll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Xianghui and Bhatnagar, Bharat Lal and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10003-10015} }
Open-Vocabulary Video Anomaly Detection: Peng Wu,

Xuerong Zhou,

Guansong Pang,

Yujia Sun,

Jing Liu,

Peng Wang,

Yanning Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Peng and Zhou, Xuerong and Pang, Guansong and Sun, Yujia and Liu, Jing and Wang, Peng and Zhang, Yanning}, title = {Open-Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18297-18307} }
ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting: Chen Duan,

Pei Fu,

Shan Guo,

Qianyi Jiang,

Xiaoming Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Duan_2024_CVPR, author = {Duan, Chen and Fu, Pei and Guo, Shan and Jiang, Qianyi and Wei, Xiaoming}, title = {ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15587-15597} }
Epistemic Uncertainty Quantification For Pre-Trained Neural Networks: Hanjing Wang,

Qiang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Hanjing and Ji, Qiang}, title = {Epistemic Uncertainty Quantification For Pre-Trained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11052-11061} }
Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving: Brian Yang,

Huangyuan Su,

Nikolaos Gkanatsios,

Tsung-Wei Ke,

Ayush Jain,

Jeff Schneider,

Katerina Fragkiadaki; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Brian and Su, Huangyuan and Gkanatsios, Nikolaos and Ke, Tsung-Wei and Jain, Ayush and Schneider, Jeff and Fragkiadaki, Katerina}, title = {Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15342-15353} }
MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation: Yuelong Li,

Yafei Mao,

Raja Bala,

Sunil Hadap; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuelong and Mao, Yafei and Bala, Raja and Hadap, Sunil}, title = {MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10476-10486} }
MonoCD: Monocular 3D Object Detection with Complementary Depths: Longfei Yan,

Pei Yan,

Shengzhou Xiong,

Xuanyu Xiang,

Yihua Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Longfei and Yan, Pei and Xiong, Shengzhou and Xiang, Xuanyu and Tan, Yihua}, title = {MonoCD: Monocular 3D Object Detection with Complementary Depths}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10248-10257} }
Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior: Zike Wu,

Pan Zhou,

Xuanyu Yi,

Xiaoding Yuan,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Zike and Zhou, Pan and Yi, Xuanyu and Yuan, Xiaoding and Zhang, Hanwang}, title = {Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9892-9902} }
ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation: Xiaoqi Li,

Mingxu Zhang,

Yiran Geng,

Haoran Geng,

Yuxing Long,

Yan Shen,

Renrui Zhang,

Jiaming Liu,

Hao Dong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiaoqi and Zhang, Mingxu and Geng, Yiran and Geng, Haoran and Long, Yuxing and Shen, Yan and Zhang, Renrui and Liu, Jiaming and Dong, Hao}, title = {ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18061-18070} }
GLaMM: Pixel Grounding Large Multimodal Model: Hanoona Rasheed,

Muhammad Maaz,

Sahal Shaji,

Abdelrahman Shaker,

Salman Khan,

Hisham Cholakkal,

Rao M. Anwer,

Eric Xing,

Ming-Hsuan Yang,

Fahad S. Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rasheed_2024_CVPR, author = {Rasheed, Hanoona and Maaz, Muhammad and Shaji, Sahal and Shaker, Abdelrahman and Khan, Salman and Cholakkal, Hisham and Anwer, Rao M. and Xing, Eric and Yang, Ming-Hsuan and Khan, Fahad S.}, title = {GLaMM: Pixel Grounding Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13009-13018} }
Incremental Residual Concept Bottleneck Models: Chenming Shang,

Shiji Zhou,

Hengyuan Zhang,

Xinzhe Ni,

Yujiu Yang,

Yuwang Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shang_2024_CVPR, author = {Shang, Chenming and Zhou, Shiji and Zhang, Hengyuan and Ni, Xinzhe and Yang, Yujiu and Wang, Yuwang}, title = {Incremental Residual Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11030-11040} }
SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World: Kiana Ehsani,

Tanmay Gupta,

Rose Hendrix,

Jordi Salvador,

Luca Weihs,

Kuo-Hao Zeng,

Kunal Pratap Singh,

Yejin Kim,

Winson Han,

Alvaro Herrasti,

Ranjay Krishna,

Dustin Schwenk,

Eli VanderBilt,

Aniruddha Kembhavi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ehsani_2024_CVPR, author = {Ehsani, Kiana and Gupta, Tanmay and Hendrix, Rose and Salvador, Jordi and Weihs, Luca and Zeng, Kuo-Hao and Singh, Kunal Pratap and Kim, Yejin and Han, Winson and Herrasti, Alvaro and Krishna, Ranjay and Schwenk, Dustin and VanderBilt, Eli and Kembhavi, Aniruddha}, title = {SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16238-16250} }
LoCoNet: Long-Short Context Network for Active Speaker Detection: Xizi Wang,

Feng Cheng,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xizi and Cheng, Feng and Bertasius, Gedas}, title = {LoCoNet: Long-Short Context Network for Active Speaker Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18462-18472} }
D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval: Yi Xie,

Yihong Lin,

Wenjie Cai,

Xuemiao Xu,

Huaidong Zhang,

Yong Du,

Shengfeng He; [pdf]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Yi and Lin, Yihong and Cai, Wenjie and Xu, Xuemiao and Zhang, Huaidong and Du, Yong and He, Shengfeng}, title = {D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17181-17190} }
Learning Triangular Distribution in Visual World: Ping Chen,

Xingpeng Zhang,

Chengtao Zhou,

Dichao Fan,

Peng Tu,

Le Zhang,

Yanlin Qian; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Zhou, Chengtao and Fan, Dichao and Tu, Peng and Zhang, Le and Qian, Yanlin}, title = {Learning Triangular Distribution in Visual World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11019-11029} }
DiaLoc: An Iterative Approach to Embodied Dialog Localization: Chao Zhang,

Mohan Li,

Ignas Budvytis,

Stephan Liwicki; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chao and Li, Mohan and Budvytis, Ignas and Liwicki, Stephan}, title = {DiaLoc: An Iterative Approach to Embodied Dialog Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12585-12593} }
Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement: Zaid Khan,

Vijay Kumar BG,

Samuel Schulter,

Yun Fu,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and BG, Vijay Kumar and Schulter, Samuel and Fu, Yun and Chandraker, Manmohan}, title = {Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14344-14353} }
MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning: Zhe Li,

Laurence T. Yang,

Bocheng Ren,

Xin Nie,

Zhangyang Gao,

Cheng Tan,

Stan Z. Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhe and Yang, Laurence T. and Ren, Bocheng and Nie, Xin and Gao, Zhangyang and Tan, Cheng and Li, Stan Z.}, title = {MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11704-11714} }
Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline: Anas Al-lahham,

Muhammad Zaigham Zaheer,

Nurbek Tastan,

Karthik Nandakumar; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Al-lahham_2024_CVPR, author = {Al-lahham, Anas and Zaheer, Muhammad Zaigham and Tastan, Nurbek and Nandakumar, Karthik}, title = {Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12416-12425} }
Resource-Efficient Transformer Pruning for Finetuning of Large Models: Fatih Ilhan,

Gong Su,

Selim Furkan Tekin,

Tiansheng Huang,

Sihao Hu,

Ling Liu; [pdf] [supp]
[bibtex]
@InProceedings{Ilhan_2024_CVPR, author = {Ilhan, Fatih and Su, Gong and Tekin, Selim Furkan and Huang, Tiansheng and Hu, Sihao and Liu, Ling}, title = {Resource-Efficient Transformer Pruning for Finetuning of Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16206-16215} }
Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping: Alex Costanzino,

Pierluigi Zama Ramirez,

Giuseppe Lisanti,

Luigi Di Stefano; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Costanzino_2024_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17234-17243} }
FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models: Adrian Bulat,

Yassine Ouali,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bulat_2024_CVPR, author = {Bulat, Adrian and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14172-14182} }
Low-power Continuous Remote Behavioral Localization with Event Cameras: Friedhelm Hamann,

Suman Ghosh,

Ignacio Juarez Martinez,

Tom Hart,

Alex Kacelnik,

Guillermo Gallego; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hamann_2024_CVPR, author = {Hamann, Friedhelm and Ghosh, Suman and Martinez, Ignacio Juarez and Hart, Tom and Kacelnik, Alex and Gallego, Guillermo}, title = {Low-power Continuous Remote Behavioral Localization with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18612-18621} }
SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos: Tao Wu,

Runyu He,

Gangshan Wu,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Tao and He, Runyu and Wu, Gangshan and Wang, Limin}, title = {SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18537-18546} }
CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models: Yasiru Ranasinghe,

Nithin Gopalakrishnan Nair,

Wele Gedara Chaminda Bandara,

Vishal M. Patel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Nair, Nithin Gopalakrishnan and Bandara, Wele Gedara Chaminda and Patel, Vishal M.}, title = {CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12809-12819} }
Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field: Yuanzhen Li,

Fei Luo,

Chunxia Xiao; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yuanzhen and Luo, Fei and Xiao, Chunxia}, title = {Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9525-9534} }
ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images: Marius Schmidt-Mengin,

Alexis Benichoux,

Shibeshih Belachew,

Nikos Komodakis,

Nikos Paragios; [pdf] [supp]
[bibtex]
@InProceedings{Schmidt-Mengin_2024_CVPR, author = {Schmidt-Mengin, Marius and Benichoux, Alexis and Belachew, Shibeshih and Komodakis, Nikos and Paragios, Nikos}, title = {ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11428-11438} }
Learning to Navigate Efficiently and Precisely in Real Environments: Guillaume Bono,

Hervé Poirier,

Leonid Antsfeld,

Gianluca Monaci,

Boris Chidlovskii,

Christian Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bono_2024_CVPR, author = {Bono, Guillaume and Poirier, Herv\'e and Antsfeld, Leonid and Monaci, Gianluca and Chidlovskii, Boris and Wolf, Christian}, title = {Learning to Navigate Efficiently and Precisely in Real Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17837-17846} }
VkD: Improving Knowledge Distillation using Orthogonal Projections: Roy Miles,

Ismail Elezi,

Jiankang Deng; [pdf] [supp]
[bibtex]
@InProceedings{Miles_2024_CVPR, author = {Miles, Roy and Elezi, Ismail and Deng, Jiankang}, title = {VkD: Improving Knowledge Distillation using Orthogonal Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15720-15730} }
LaRE^2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection: Yunpeng Luo,

Junlong Du,

Ke Yan,

Shouhong Ding; [pdf] [supp]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Yunpeng and Du, Junlong and Yan, Ke and Ding, Shouhong}, title = {LaRE{\textasciicircum}2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17006-17015} }
T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory: Daehee Park,

Jaeseok Jeong,

Sung-Hoon Yoon,

Jaewoo Jeong,

Kuk-Jin Yoon; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Daehee and Jeong, Jaeseok and Yoon, Sung-Hoon and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15065-15076} }
InstaGen: Enhancing Object Detection by Training on Synthetic Dataset: Chengjian Feng,

Yujie Zhong,

Zequn Jie,

Weidi Xie,

Lin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Chengjian and Zhong, Yujie and Jie, Zequn and Xie, Weidi and Ma, Lin}, title = {InstaGen: Enhancing Object Detection by Training on Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14121-14130} }
Visual Point Cloud Forecasting enables Scalable Autonomous Driving: Zetong Yang,

Li Chen,

Yanan Sun,

Hongyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Chen, Li and Sun, Yanan and Li, Hongyang}, title = {Visual Point Cloud Forecasting enables Scalable Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14673-14684} }
Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA: Zhuowan Li,

Bhavan Jasani,

Peng Tang,

Shabnam Ghadar; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhuowan and Jasani, Bhavan and Tang, Peng and Ghadar, Shabnam}, title = {Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13613-13623} }
LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding: Chuwei Luo,

Yufan Shen,

Zhaoqing Zhu,

Qi Zheng,

Zhi Yu,

Cong Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Chuwei and Shen, Yufan and Zhu, Zhaoqing and Zheng, Qi and Yu, Zhi and Yao, Cong}, title = {LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15630-15640} }
ProTeCt: Prompt Tuning for Taxonomic Open Set Classification: Tz-Ying Wu,

Chih-Hui Ho,

Nuno Vasconcelos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Tz-Ying and Ho, Chih-Hui and Vasconcelos, Nuno}, title = {ProTeCt: Prompt Tuning for Taxonomic Open Set Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16531-16540} }
Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology: Oren Kraus,

Kian Kenyon-Dean,

Saber Saberian,

Maryam Fallah,

Peter McLean,

Jess Leung,

Vasudev Sharma,

Ayla Khan,

Jia Balakrishnan,

Safiye Celik,

Dominique Beaini,

Maciej Sypetkowski,

Chi Vicky Cheng,

Kristen Morse,

Maureen Makes,

Ben Mabey,

Berton Earnshaw; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kraus_2024_CVPR, author = {Kraus, Oren and Kenyon-Dean, Kian and Saberian, Saber and Fallah, Maryam and McLean, Peter and Leung, Jess and Sharma, Vasudev and Khan, Ayla and Balakrishnan, Jia and Celik, Safiye and Beaini, Dominique and Sypetkowski, Maciej and Cheng, Chi Vicky and Morse, Kristen and Makes, Maureen and Mabey, Ben and Earnshaw, Berton}, title = {Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11757-11768} }
Segment and Caption Anything: Xiaoke Huang,

Jianfeng Wang,

Yansong Tang,

Zheng Zhang,

Han Hu,

Jiwen Lu,

Lijuan Wang,

Zicheng Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Xiaoke and Wang, Jianfeng and Tang, Yansong and Zhang, Zheng and Hu, Han and Lu, Jiwen and Wang, Lijuan and Liu, Zicheng}, title = {Segment and Caption Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13405-13417} }
Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving: Mozhgan Pourkeshavarz,

Mohammad Sabokrou,

Amir Rasouli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Sabokrou, Mohammad and Rasouli, Amir}, title = {Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14885-14894} }
Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs: Lin Song,

Yukang Chen,

Shuai Yang,

Xiaohan Ding,

Yixiao Ge,

Ying-Cong Chen,

Ying Shan; [pdf]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Lin and Chen, Yukang and Yang, Shuai and Ding, Xiaohan and Ge, Yixiao and Chen, Ying-Cong and Shan, Ying}, title = {Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13763-13773} }
TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation: Xiaopei Wu,

Yuenan Hou,

Xiaoshui Huang,

Binbin Lin,

Tong He,

Xinge Zhu,

Yuexin Ma,

Boxi Wu,

Haifeng Liu,

Deng Cai,

Wanli Ouyang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaopei and Hou, Yuenan and Huang, Xiaoshui and Lin, Binbin and He, Tong and Zhu, Xinge and Ma, Yuexin and Wu, Boxi and Liu, Haifeng and Cai, Deng and Ouyang, Wanli}, title = {TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15311-15320} }
Bootstrapping SparseFormers from Vision Foundation Models: Ziteng Gao,

Zhan Tong,

Kevin Qinghong Lin,

Joya Chen,

Mike Zheng Shou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Ziteng and Tong, Zhan and Lin, Kevin Qinghong and Chen, Joya and Shou, Mike Zheng}, title = {Bootstrapping SparseFormers from Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17710-17721} }
EventPS: Real-Time Photometric Stereo Using an Event Camera: Bohan Yu,

Jieji Ren,

Jin Han,

Feishi Wang,

Jinxiu Liang,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Bohan and Ren, Jieji and Han, Jin and Wang, Feishi and Liang, Jinxiu and Shi, Boxin}, title = {EventPS: Real-Time Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9602-9611} }
On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving: Kaituo Feng,

Changsheng Li,

Dongchun Ren,

Ye Yuan,

Guoren Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Kaituo and Li, Changsheng and Ren, Dongchun and Yuan, Ye and Wang, Guoren}, title = {On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15099-15108} }
PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding: Xuesong Nie,

Haoyuan Jin,

Yunfeng Yan,

Xi Chen,

Zhihang Zhu,

Donglian Qi; [pdf]
[bibtex]
@InProceedings{Nie_2024_CVPR, author = {Nie, Xuesong and Jin, Haoyuan and Yan, Yunfeng and Chen, Xi and Zhu, Zhihang and Qi, Donglian}, title = {PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18143-18152} }
FairCLIP: Harnessing Fairness in Vision-Language Learning: Yan Luo,

Min Shi,

Muhammad Osama Khan,

Muhammad Muneeb Afzal,

Hao Huang,

Shuaihang Yuan,

Yu Tian,

Luo Song,

Ava Kouhana,

Tobias Elze,

Yi Fang,

Mengyu Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Yan and Shi, Min and Khan, Muhammad Osama and Afzal, Muhammad Muneeb and Huang, Hao and Yuan, Shuaihang and Tian, Yu and Song, Luo and Kouhana, Ava and Elze, Tobias and Fang, Yi and Wang, Mengyu}, title = {FairCLIP: Harnessing Fairness in Vision-Language Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12289-12301} }
StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation: Yining Shi,

Kun Jiang,

Ke Wang,

Jiusi Li,

Yunlong Wang,

Mengmeng Yang,

Diange Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Yining and Jiang, Kun and Wang, Ke and Li, Jiusi and Wang, Yunlong and Yang, Mengmeng and Yang, Diange}, title = {StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14833-14842} }
Language Model Guided Interpretable Video Action Reasoning: Ning Wang,

Guangming Zhu,

HS Li,

Liang Zhang,

Syed Afaq Ali Shah,

Mohammed Bennamoun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Ning and Zhu, Guangming and Li, HS and Zhang, Liang and Shah, Syed Afaq Ali and Bennamoun, Mohammed}, title = {Language Model Guided Interpretable Video Action Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18878-18887} }
See Say and Segment: Teaching LMMs to Overcome False Premises: Tsung-Han Wu,

Giscard Biamby,

David Chan,

Lisa Dunlap,

Ritwik Gupta,

Xudong Wang,

Joseph E. Gonzalez,

Trevor Darrell; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Biamby, Giscard and Chan, David and Dunlap, Lisa and Gupta, Ritwik and Wang, Xudong and Gonzalez, Joseph E. and Darrell, Trevor}, title = {See Say and Segment: Teaching LMMs to Overcome False Premises}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13459-13469} }
Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?: Zhiqi Li,

Zhiding Yu,

Shiyi Lan,

Jiahan Li,

Jan Kautz,

Tong Lu,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zhiqi and Yu, Zhiding and Lan, Shiyi and Li, Jiahan and Kautz, Jan and Lu, Tong and Alvarez, Jose M.}, title = {Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14864-14873} }
CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion: Xiaoyu Wu,

Yang Hua,

Chumeng Liang,

Jiaru Zhang,

Hao Wang,

Tao Song,

Haibing Guan; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyu and Hua, Yang and Liang, Chumeng and Zhang, Jiaru and Wang, Hao and Song, Tao and Guan, Haibing}, title = {CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10812-10821} }
Making Visual Sense of Oracle Bones for You and Me: Runqi Qiao,

Lan Yang,

Kaiyue Pang,

Honggang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Qiao_2024_CVPR, author = {Qiao, Runqi and Yang, Lan and Pang, Kaiyue and Zhang, Honggang}, title = {Making Visual Sense of Oracle Bones for You and Me}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12656-12665} }
MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision: Chenyangguang Zhang,

Guanlong Jiao,

Yan Di,

Gu Wang,

Ziqin Huang,

Ruida Zhang,

Fabian Manhardt,

Bowen Fu,

Federico Tombari,

Xiangyang Ji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chenyangguang and Jiao, Guanlong and Di, Yan and Wang, Gu and Huang, Ziqin and Zhang, Ruida and Manhardt, Fabian and Fu, Bowen and Tombari, Federico and Ji, Xiangyang}, title = {MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9992-10002} }
SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation: Yamei Chen,

Yan Di,

Guangyao Zhai,

Fabian Manhardt,

Chenyangguang Zhang,

Ruida Zhang,

Federico Tombari,

Nassir Navab,

Benjamin Busam; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yamei and Di, Yan and Zhai, Guangyao and Manhardt, Fabian and Zhang, Chenyangguang and Zhang, Ruida and Tombari, Federico and Navab, Nassir and Busam, Benjamin}, title = {SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9959-9969} }
EgoGen: An Egocentric Synthetic Data Generator: Gen Li,

Kaifeng Zhao,

Siwei Zhang,

Xiaozhong Lyu,

Mihai Dusmanu,

Yan Zhang,

Marc Pollefeys,

Siyu Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Gen and Zhao, Kaifeng and Zhang, Siwei and Lyu, Xiaozhong and Dusmanu, Mihai and Zhang, Yan and Pollefeys, Marc and Tang, Siyu}, title = {EgoGen: An Egocentric Synthetic Data Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14497-14509} }
Video ReCap: Recursive Captioning of Hour-Long Videos: Md Mohaiminul Islam,

Ngan Ho,

Xitong Yang,

Tushar Nagarajan,

Lorenzo Torresani,

Gedas Bertasius; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Islam_2024_CVPR, author = {Islam, Md Mohaiminul and Ho, Ngan and Yang, Xitong and Nagarajan, Tushar and Torresani, Lorenzo and Bertasius, Gedas}, title = {Video ReCap: Recursive Captioning of Hour-Long Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18198-18208} }
Towards Realistic Scene Generation with LiDAR Diffusion Models: Haoxi Ran,

Vitor Guizilini,

Yue Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ran_2024_CVPR, author = {Ran, Haoxi and Guizilini, Vitor and Wang, Yue}, title = {Towards Realistic Scene Generation with LiDAR Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14738-14748} }
Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance: Yuto Enyo,

Ko Nishino; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Enyo_2024_CVPR, author = {Enyo, Yuto and Nishino, Ko}, title = {Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11873-11883} }
MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI: Xiang Yue,

Yuansheng Ni,

Kai Zhang,

Tianyu Zheng,

Ruoqi Liu,

Ge Zhang,

Samuel Stevens,

Dongfu Jiang,

Weiming Ren,

Yuxuan Sun,

Cong Wei,

Botao Yu,

Ruibin Yuan,

Renliang Sun,

Ming Yin,

Boyuan Zheng,

Zhenzhu Yang,

Yibo Liu,

Wenhao Huang,

Huan Sun,

Yu Su,

Wenhu Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yue_2024_CVPR, author = {Yue, Xiang and Ni, Yuansheng and Zhang, Kai and Zheng, Tianyu and Liu, Ruoqi and Zhang, Ge and Stevens, Samuel and Jiang, Dongfu and Ren, Weiming and Sun, Yuxuan and Wei, Cong and Yu, Botao and Yuan, Ruibin and Sun, Renliang and Yin, Ming and Zheng, Boyuan and Yang, Zhenzhu and Liu, Yibo and Huang, Wenhao and Sun, Huan and Su, Yu and Chen, Wenhu}, title = {MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9556-9567} }
EarthLoc: Astronaut Photography Localization by Indexing Earth from Space: Gabriele Berton,

Alex Stoken,

Barbara Caputo,

Carlo Masone; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Berton_2024_CVPR, author = {Berton, Gabriele and Stoken, Alex and Caputo, Barbara and Masone, Carlo}, title = {EarthLoc: Astronaut Photography Localization by Indexing Earth from Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12754-12764} }
Text-Image Alignment for Diffusion-Based Perception: Neehar Kondapaneni,

Markus Marks,

Manuel Knott,

Rogerio Guimaraes,

Pietro Perona; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kondapaneni_2024_CVPR, author = {Kondapaneni, Neehar and Marks, Markus and Knott, Manuel and Guimaraes, Rogerio and Perona, Pietro}, title = {Text-Image Alignment for Diffusion-Based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13883-13893} }
MemFlow: Optical Flow Estimation and Prediction with Memory: Qiaole Dong,

Yanwei Fu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dong_2024_CVPR, author = {Dong, Qiaole and Fu, Yanwei}, title = {MemFlow: Optical Flow Estimation and Prediction with Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19068-19078} }
Novel Class Discovery for Ultra-Fine-Grained Visual Categorization: Yu Liu,

Yaqi Cai,

Qi Jia,

Binglin Qiu,

Weimin Wang,

Nan Pu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Yu and Cai, Yaqi and Jia, Qi and Qiu, Binglin and Wang, Weimin and Pu, Nan}, title = {Novel Class Discovery for Ultra-Fine-Grained Visual Categorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17679-17688} }
DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors: Biwen Lei,

Kai Yu,

Mengyang Feng,

Miaomiao Cui,

Xuansong Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2024_CVPR, author = {Lei, Biwen and Yu, Kai and Feng, Mengyang and Cui, Miaomiao and Xie, Xuansong}, title = {DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10487-10497} }
Rethinking Boundary Discontinuity Problem for Oriented Object Detection: Hang Xu,

Xinyuan Liu,

Haonan Xu,

Yike Ma,

Zunjie Zhu,

Chenggang Yan,

Feng Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Hang and Liu, Xinyuan and Xu, Haonan and Ma, Yike and Zhu, Zunjie and Yan, Chenggang and Dai, Feng}, title = {Rethinking Boundary Discontinuity Problem for Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17406-17415} }
SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers: Jonathan F. Carter,

João Jorge,

Oliver Gibson,

Lionel Tarassenko; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Carter_2024_CVPR, author = {Carter, Jonathan F. and Jorge, Jo\~ao and Gibson, Oliver and Tarassenko, Lionel}, title = {SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12479-12489} }
TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding: Shuhuai Ren,

Linli Yao,

Shicheng Li,

Xu Sun,

Lu Hou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Shuhuai and Yao, Linli and Li, Shicheng and Sun, Xu and Hou, Lu}, title = {TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14313-14323} }
ManiFPT: Defining and Analyzing Fingerprints of Generative Models: Hae Jin Song,

Mahyar Khayatkhoei,

Wael AbdAlmageed; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Hae Jin and Khayatkhoei, Mahyar and AbdAlmageed, Wael}, title = {ManiFPT: Defining and Analyzing Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10791-10801} }
Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos: Mehmet Saygin Seyfioglu,

Wisdom O. Ikezogwo,

Fatemeh Ghezloo,

Ranjay Krishna,

Linda Shapiro; [pdf] [supp]
[bibtex]
@InProceedings{Seyfioglu_2024_CVPR, author = {Seyfioglu, Mehmet Saygin and Ikezogwo, Wisdom O. and Ghezloo, Fatemeh and Krishna, Ranjay and Shapiro, Linda}, title = {Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13183-13192} }
E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator: Wenjun Wu,

Lingling Zhang,

Jun Liu,

Xi Tang,

Yaxian Wang,

Shaowei Wang,

Qianying Wang; [pdf]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Wenjun and Zhang, Lingling and Liu, Jun and Tang, Xi and Wang, Yaxian and Wang, Shaowei and Wang, Qianying}, title = {E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13828-13837} }
Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving: Yuqi Wang,

Jiawei He,

Lue Fan,

Hongxin Li,

Yuntao Chen,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and He, Jiawei and Fan, Lue and Li, Hongxin and Chen, Yuntao and Zhang, Zhaoxiang}, title = {Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14749-14759} }
OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies: Lingdong Kong,

Youquan Liu,

Lai Xing Ng,

Benoit R. Cottereau,

Wei Tsang Ooi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kong_2024_CVPR, author = {Kong, Lingdong and Liu, Youquan and Ng, Lai Xing and Cottereau, Benoit R. and Ooi, Wei Tsang}, title = {OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15686-15698} }
Do Vision and Language Encoders Represent the World Similarly?: Mayug Maniparambil,

Raiymbek Akshulakov,

Yasser Abdelaziz Dahou Djilali,

Mohamed El Amine Seddik,

Sanath Narayan,

Karttikeya Mangalam,

Noel E. O'Connor; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Maniparambil_2024_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and El Amine Seddik, Mohamed and Narayan, Sanath and Mangalam, Karttikeya and O'Connor, Noel E.}, title = {Do Vision and Language Encoders Represent the World Similarly?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14334-14343} }
MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction: Xiaolu Liu,

Song Wang,

Wentong Li,

Ruizi Yang,

Junbo Chen,

Jianke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaolu and Wang, Song and Li, Wentong and Yang, Ruizi and Chen, Junbo and Zhu, Jianke}, title = {MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14812-14821} }
VidLA: Video-Language Alignment at Scale: Mamshad Nayeem Rizve,

Fan Fei,

Jayakrishnan Unnikrishnan,

Son Tran,

Benjamin Z. Yao,

Belinda Zeng,

Mubarak Shah,

Trishul Chilimbi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rizve_2024_CVPR, author = {Rizve, Mamshad Nayeem and Fei, Fan and Unnikrishnan, Jayakrishnan and Tran, Son and Yao, Benjamin Z. and Zeng, Belinda and Shah, Mubarak and Chilimbi, Trishul}, title = {VidLA: Video-Language Alignment at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14043-14055} }
ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments: Jingyu Zhang,

Kun Yang,

Yilei Wang,

Hanqi Wang,

Peng Sun,

Liang Song; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingyu and Yang, Kun and Wang, Yilei and Wang, Hanqi and Sun, Peng and Song, Liang}, title = {ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12575-12584} }
PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks: Marina Neseem,

Conor McCullough,

Randy Hsin,

Chas Leichner,

Shan Li,

In Suk Chong,

Andrew Howard,

Lukasz Lew,

Sherief Reda,

Ville-Mikko Rautio,

Daniele Moro; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Neseem_2024_CVPR, author = {Neseem, Marina and McCullough, Conor and Hsin, Randy and Leichner, Chas and Li, Shan and Chong, In Suk and Howard, Andrew and Lew, Lukasz and Reda, Sherief and Rautio, Ville-Mikko and Moro, Daniele}, title = {PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15996-16005} }
CAGE: Controllable Articulation GEneration: Jiayi Liu,

Hou In Ivan Tam,

Ali Mahdavi-Amiri,

Manolis Savva; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Jiayi and Tam, Hou In Ivan and Mahdavi-Amiri, Ali and Savva, Manolis}, title = {CAGE: Controllable Articulation GEneration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17880-17889} }
FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders: Soumen Basu,

Mayuna Gupta,

Chetan Madan,

Pankaj Gupta,

Chetan Arora; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Basu_2024_CVPR, author = {Basu, Soumen and Gupta, Mayuna and Madan, Chetan and Gupta, Pankaj and Arora, Chetan}, title = {FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11715-11725} }
Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models: Matthew Kowal,

Richard P. Wildes,

Konstantinos G. Derpanis; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Wildes, Richard P. and Derpanis, Konstantinos G.}, title = {Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10895-10905} }
GRAM: Global Reasoning for Multi-Page VQA: Tsachi Blau,

Sharon Fogel,

Roi Ronen,

Alona Golts,

Roy Ganz,

Elad Ben Avraham,

Aviad Aberdam,

Shahar Tsiper,

Ron Litman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Blau_2024_CVPR, author = {Blau, Tsachi and Fogel, Sharon and Ronen, Roi and Golts, Alona and Ganz, Roy and Ben Avraham, Elad and Aberdam, Aviad and Tsiper, Shahar and Litman, Ron}, title = {GRAM: Global Reasoning for Multi-Page VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15598-15607} }
MS-DETR: Efficient DETR Training with Mixed Supervision: Chuyang Zhao,

Yifan Sun,

Wenhao Wang,

Qiang Chen,

Errui Ding,

Yi Yang,

Jingdong Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chuyang and Sun, Yifan and Wang, Wenhao and Chen, Qiang and Ding, Errui and Yang, Yi and Wang, Jingdong}, title = {MS-DETR: Efficient DETR Training with Mixed Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17027-17036} }
BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection: Wenjie Wang,

Yehao Lu,

Guangcong Zheng,

Shuigen Zhan,

Xiaoqing Ye,

Zichang Tan,

Jingdong Wang,

Gaoang Wang,

Xi Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Wenjie and Lu, Yehao and Zheng, Guangcong and Zhan, Shuigen and Ye, Xiaoqing and Tan, Zichang and Wang, Jingdong and Wang, Gaoang and Li, Xi}, title = {BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14718-14727} }
DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving: Chen Min,

Dawei Zhao,

Liang Xiao,

Jian Zhao,

Xinli Xu,

Zheng Zhu,

Lei Jin,

Jianshu Li,

Yulan Guo,

Junliang Xing,

Liping Jing,

Yiming Nie,

Bin Dai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Min_2024_CVPR, author = {Min, Chen and Zhao, Dawei and Xiao, Liang and Zhao, Jian and Xu, Xinli and Zhu, Zheng and Jin, Lei and Li, Jianshu and Guo, Yulan and Xing, Junliang and Jing, Liping and Nie, Yiming and Dai, Bin}, title = {DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15522-15533} }
Bridging the Gap Between End-to-End and Two-Step Text Spotting: Mingxin Huang,

Hongliang Li,

Yuliang Liu,

Xiang Bai,

Lianwen Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Mingxin and Li, Hongliang and Liu, Yuliang and Bai, Xiang and Jin, Lianwen}, title = {Bridging the Gap Between End-to-End and Two-Step Text Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15608-15618} }
SUGAR: Pre-training 3D Visual Representations for Robotics: Shizhe Chen,

Ricardo Garcia,

Ivan Laptev,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Shizhe and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, title = {SUGAR: Pre-training 3D Visual Representations for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18049-18060} }
PairAug: What Can Augmented Image-Text Pairs Do for Radiology?: Yutong Xie,

Qi Chen,

Sinuo Wang,

Minh-Son To,

Iris Lee,

Ee Win Khoo,

Kerolos Hendy,

Daniel Koh,

Yong Xia,

Qi Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Yutong and Chen, Qi and Wang, Sinuo and To, Minh-Son and Lee, Iris and Khoo, Ee Win and Hendy, Kerolos and Koh, Daniel and Xia, Yong and Wu, Qi}, title = {PairAug: What Can Augmented Image-Text Pairs Do for Radiology?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11652-11661} }
Harnessing Large Language Models for Training-free Video Anomaly Detection: Luca Zanella,

Willi Menapace,

Massimiliano Mancini,

Yiming Wang,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zanella_2024_CVPR, author = {Zanella, Luca and Menapace, Willi and Mancini, Massimiliano and Wang, Yiming and Ricci, Elisa}, title = {Harnessing Large Language Models for Training-free Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18527-18536} }
FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment: Jinglin Xu,

Sibo Yin,

Guohao Zhao,

Zishuo Wang,

Yuxin Peng; [pdf] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Yin, Sibo and Zhao, Guohao and Wang, Zishuo and Peng, Yuxin}, title = {FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14628-14637} }
Language Models as Black-Box Optimizers for Vision-Language Models: Shihong Liu,

Samuel Yu,

Zhiqiu Lin,

Deepak Pathak,

Deva Ramanan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Shihong and Yu, Samuel and Lin, Zhiqiu and Pathak, Deepak and Ramanan, Deva}, title = {Language Models as Black-Box Optimizers for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12687-12697} }
Exploring Orthogonality in Open World Object Detection: Zhicheng Sun,

Jinghan Li,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Zhicheng and Li, Jinghan and Mu, Yadong}, title = {Exploring Orthogonality in Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17302-17312} }
Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding: Sicong Leng,

Hang Zhang,

Guanzheng Chen,

Xin Li,

Shijian Lu,

Chunyan Miao,

Lidong Bing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Leng_2024_CVPR, author = {Leng, Sicong and Zhang, Hang and Chen, Guanzheng and Li, Xin and Lu, Shijian and Miao, Chunyan and Bing, Lidong}, title = {Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13872-13882} }
Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior: Cheng Chen,

Xiaofeng Yang,

Fan Yang,

Chengzeng Feng,

Zhoujie Fu,

Chuan-Sheng Foo,

Guosheng Lin,

Fayao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Cheng and Yang, Xiaofeng and Yang, Fan and Feng, Chengzeng and Fu, Zhoujie and Foo, Chuan-Sheng and Lin, Guosheng and Liu, Fayao}, title = {Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10228-10237} }
ScanFormer: Referring Expression Comprehension by Iteratively Scanning: Wei Su,

Peihan Miao,

Huanzhang Dou,

Xi Li; [pdf] [arXiv]
[bibtex]
@InProceedings{Su_2024_CVPR, author = {Su, Wei and Miao, Peihan and Dou, Huanzhang and Li, Xi}, title = {ScanFormer: Referring Expression Comprehension by Iteratively Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13449-13458} }
Model Inversion Robustness: Can Transfer Learning Help?: Sy-Tuyen Ho,

Koh Jun Hao,

Keshigeyan Chandrasegaran,

Ngoc-Bao Nguyen,

Ngai-Man Cheung; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2024_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Chandrasegaran, Keshigeyan and Nguyen, Ngoc-Bao and Cheung, Ngai-Man}, title = {Model Inversion Robustness: Can Transfer Learning Help?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12183-12193} }
RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback: Tianyu Yu,

Yuan Yao,

Haoye Zhang,

Taiwen He,

Yifeng Han,

Ganqu Cui,

Jinyi Hu,

Zhiyuan Liu,

Hai-Tao Zheng,

Maosong Sun,

Tat-Seng Chua; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Tianyu and Yao, Yuan and Zhang, Haoye and He, Taiwen and Han, Yifeng and Cui, Ganqu and Hu, Jinyi and Liu, Zhiyuan and Zheng, Hai-Tao and Sun, Maosong and Chua, Tat-Seng}, title = {RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13807-13816} }
ZeroShape: Regression-based Zero-shot Shape Reconstruction: Zixuan Huang,

Stefan Stojanov,

Anh Thai,

Varun Jampani,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Stojanov, Stefan and Thai, Anh and Jampani, Varun and Rehg, James M.}, title = {ZeroShape: Regression-based Zero-shot Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10061-10071} }
The STVchrono Dataset: Towards Continuous Change Recognition in Time: Yanjun Sun,

Yue Qiu,

Mariia Khan,

Fumiya Matsuzawa,

Kenji Iwata; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Yanjun and Qiu, Yue and Khan, Mariia and Matsuzawa, Fumiya and Iwata, Kenji}, title = {The STVchrono Dataset: Towards Continuous Change Recognition in Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14111-14120} }
SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction: Conghao Wong,

Beihao Xia,

Ziqian Zou,

Yulong Wang,

Xinge You; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wong_2024_CVPR, author = {Wong, Conghao and Xia, Beihao and Zou, Ziqian and Wang, Yulong and You, Xinge}, title = {SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19005-19015} }
Neighbor Relations Matter in Video Scene Detection: Jiawei Tan,

Hongxing Wang,

Jiaxin Li,

Zhilong Ou,

Zhangbin Qian; [pdf] [supp]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Li, Jiaxin and Ou, Zhilong and Qian, Zhangbin}, title = {Neighbor Relations Matter in Video Scene Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18473-18482} }
Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers: Subhadeep Koley,

Ayan Kumar Bhunia,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16826-16837} }
Mudslide: A Universal Nuclear Instance Segmentation Method: Jun Wang; [pdf]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jun}, title = {Mudslide: A Universal Nuclear Instance Segmentation Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11673-11682} }
Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations: Sangmin Lee,

Bolin Lai,

Fiona Ryan,

Bikram Boote,

James M. Rehg; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lee_2024_CVPR, author = {Lee, Sangmin and Lai, Bolin and Ryan, Fiona and Boote, Bikram and Rehg, James M.}, title = {Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14585-14595} }
Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization: Deng Li,

Aming Wu,

Yaowei Wang,

Yahong Han; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Deng and Wu, Aming and Wang, Yaowei and Han, Yahong}, title = {Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17606-17615} }
Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval: Rohan Sarkar,

Avinash Kak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sarkar_2024_CVPR, author = {Sarkar, Rohan and Kak, Avinash}, title = {Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17077-17085} }
vid-TLDR: Training Free Token Merging for Light-weight Video Transformer: Joonmyung Choi,

Sanghyeok Lee,

Jaewon Chu,

Minhyuk Choi,

Hyunwoo J. Kim; [pdf] [supp]
[bibtex]
@InProceedings{Choi_2024_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Chu, Jaewon and Choi, Minhyuk and Kim, Hyunwoo J.}, title = {vid-TLDR: Training Free Token Merging for Light-weight Video Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18771-18781} }
DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback: Yangyi Chen,

Karan Sikka,

Michael Cogswell,

Heng Ji,

Ajay Divakaran; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yangyi and Sikka, Karan and Cogswell, Michael and Ji, Heng and Divakaran, Ajay}, title = {DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14239-14250} }
Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement: Xiuquan Hou,

Meiqin Liu,

Senlin Zhang,

Ping Wei,

Badong Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Hou_2024_CVPR, author = {Hou, Xiuquan and Liu, Meiqin and Zhang, Senlin and Wei, Ping and Chen, Badong}, title = {Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17574-17583} }
Towards More Unified In-context Visual Understanding: Dianmo Sheng,

Dongdong Chen,

Zhentao Tan,

Qiankun Liu,

Qi Chu,

Jianmin Bao,

Tao Gong,

Bin Liu,

Shengwei Xu,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sheng_2024_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Bao, Jianmin and Gong, Tao and Liu, Bin and Xu, Shengwei and Yu, Nenghai}, title = {Towards More Unified In-context Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13362-13372} }
F3Loc: Fusion and Filtering for Floorplan Localization: Changan Chen,

Rui Wang,

Christoph Vogel,

Marc Pollefeys; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Changan and Wang, Rui and Vogel, Christoph and Pollefeys, Marc}, title = {F3Loc: Fusion and Filtering for Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18029-18038} }
Multi-View Attentive Contextualization for Multi-View 3D Object Detection: Xianpeng Liu,

Ce Zheng,

Ming Qian,

Nan Xue,

Chen Chen,

Zhebin Zhang,

Chen Li,

Tianfu Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xianpeng and Zheng, Ce and Qian, Ming and Xue, Nan and Chen, Chen and Zhang, Zhebin and Li, Chen and Wu, Tianfu}, title = {Multi-View Attentive Contextualization for Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16688-16698} }
MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation: Xiaolong Deng,

Huisi Wu,

Runhao Zeng,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Xiaolong and Wu, Huisi and Zeng, Runhao and Qin, Jing}, title = {MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9622-9631} }
Language-conditioned Detection Transformer: Jang Hyun Cho,

Philipp Krähenbühl; [pdf] [supp]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Jang Hyun and Kr\"ahenb\"uhl, Philipp}, title = {Language-conditioned Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16593-16603} }
Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment: Muhammad Sohail Danish,

Muhammad Haris Khan,

Muhammad Akhtar Munir,

M. Saquib Sarfraz,

Mohsen Ali; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Danish_2024_CVPR, author = {Danish, Muhammad Sohail and Khan, Muhammad Haris and Munir, Muhammad Akhtar and Sarfraz, M. Saquib and Ali, Mohsen}, title = {Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17732-17742} }
ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe: Yifan Bai,

Zeyang Zhao,

Yihong Gong,

Xing Wei; [pdf] [arXiv]
[bibtex]
@InProceedings{Bai_2024_CVPR, author = {Bai, Yifan and Zhao, Zeyang and Gong, Yihong and Wei, Xing}, title = {ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19048-19057} }
A Vision Check-up for Language Models: Pratyusha Sharma,

Tamar Rott Shaham,

Manel Baradad,

Stephanie Fu,

Adrian Rodriguez-Munoz,

Shivam Duggal,

Phillip Isola,

Antonio Torralba; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sharma_2024_CVPR, author = {Sharma, Pratyusha and Shaham, Tamar Rott and Baradad, Manel and Fu, Stephanie and Rodriguez-Munoz, Adrian and Duggal, Shivam and Isola, Phillip and Torralba, Antonio}, title = {A Vision Check-up for Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14410-14419} }
SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining: Chull Hwan Song,

Taebaek Hwang,

Jooyoung Yoon,

Shunghyun Choi,

Yeong Hyeon Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Hwang, Taebaek and Yoon, Jooyoung and Choi, Shunghyun and Gu, Yeong Hyeon}, title = {SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13948-13957} }
Countering Personalized Text-to-Image Generation with Influence Watermarks: Hanwen Liu,

Zhicheng Sun,

Yadong Mu; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Hanwen and Sun, Zhicheng and Mu, Yadong}, title = {Countering Personalized Text-to-Image Generation with Influence Watermarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12257-12267} }
PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection: Xiaofan Li,

Zhizhong Zhang,

Xin Tan,

Chengwei Chen,

Yanyun Qu,

Yuan Xie,

Lizhuang Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiaofan and Zhang, Zhizhong and Tan, Xin and Chen, Chengwei and Qu, Yanyun and Xie, Yuan and Ma, Lizhuang}, title = {PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16838-16848} }
DETRs Beat YOLOs on Real-time Object Detection: Yian Zhao,

Wenyu Lv,

Shangliang Xu,

Jinman Wei,

Guanzhong Wang,

Qingqing Dang,

Yi Liu,

Jie Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Lv, Wenyu and Xu, Shangliang and Wei, Jinman and Wang, Guanzhong and Dang, Qingqing and Liu, Yi and Chen, Jie}, title = {DETRs Beat YOLOs on Real-time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16965-16974} }
An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing: Feiran Hu,

Chenlin Zhang,

Jiangliang Guo,

Xiu-Shen Wei,

Lin Zhao,

Anqi Xu,

Lingyan Gao; [pdf] [supp]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Feiran and Zhang, Chenlin and Guo, Jiangliang and Wei, Xiu-Shen and Zhao, Lin and Xu, Anqi and Gao, Lingyan}, title = {An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17648-17657} }
Exploring Pose-Aware Human-Object Interaction via Hybrid Learning: Eastman Z Y Wu,

Yali Li,

Yuan Wang,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Eastman Z Y and Li, Yali and Wang, Yuan and Wang, Shengjin}, title = {Exploring Pose-Aware Human-Object Interaction via Hybrid Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17815-17825} }
Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction: Di Wen,

Haoran Xu,

Zhaocheng He,

Zhe Wu,

Guang Tan,

Peixi Peng; [pdf] [supp]
[bibtex]
@InProceedings{Wen_2024_CVPR, author = {Wen, Di and Xu, Haoran and He, Zhaocheng and Wu, Zhe and Tan, Guang and Peng, Peixi}, title = {Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14822-14832} }
Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking: Cheng-Yao Hong,

Yen-Chi Hsu,

Tyng-Luh Liu; [pdf] [supp]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Cheng-Yao and Hsu, Yen-Chi and Liu, Tyng-Luh}, title = {Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17627-17637} }
Enhancing the Power of OOD Detection via Sample-Aware Model Selection: Feng Xue,

Zi He,

Yuan Zhang,

Chuanlong Xie,

Zhenguo Li,

Falong Tan; [pdf] [supp]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Feng and He, Zi and Zhang, Yuan and Xie, Chuanlong and Li, Zhenguo and Tan, Falong}, title = {Enhancing the Power of OOD Detection via Sample-Aware Model Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17148-17157} }
Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles: Rui Song,

Chenwei Liang,

Hu Cao,

Zhiran Yan,

Walter Zimmer,

Markus Gross,

Andreas Festag,

Alois Knoll; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Rui and Liang, Chenwei and Cao, Hu and Yan, Zhiran and Zimmer, Walter and Gross, Markus and Festag, Andreas and Knoll, Alois}, title = {Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17996-18006} }
Towards Generalizable Tumor Synthesis: Qi Chen,

Xiaoxi Chen,

Haorui Song,

Zhiwei Xiong,

Alan Yuille,

Chen Wei,

Zongwei Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Qi and Chen, Xiaoxi and Song, Haorui and Xiong, Zhiwei and Yuille, Alan and Wei, Chen and Zhou, Zongwei}, title = {Towards Generalizable Tumor Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11147-11158} }
EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion: Zehuan Huang,

Hao Wen,

Junting Dong,

Yaohui Wang,

Yangguang Li,

Xinyuan Chen,

Yan-Pei Cao,

Ding Liang,

Yu Qiao,

Bo Dai,

Lu Sheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zehuan and Wen, Hao and Dong, Junting and Wang, Yaohui and Li, Yangguang and Chen, Xinyuan and Cao, Yan-Pei and Liang, Ding and Qiao, Yu and Dai, Bo and Sheng, Lu}, title = {EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9784-9794} }
On the Faithfulness of Vision Transformer Explanations: Junyi Wu,

Weitai Kang,

Hao Tang,

Yuan Hong,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Kang, Weitai and Tang, Hao and Hong, Yuan and Yan, Yan}, title = {On the Faithfulness of Vision Transformer Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10936-10945} }
Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration: Yixuan Sun,

Zhangyue Yin,

Haibo Wang,

Yan Wang,

Xipeng Qiu,

Weifeng Ge,

Wenqiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Yixuan and Yin, Zhangyue and Wang, Haibo and Wang, Yan and Qiu, Xipeng and Ge, Weifeng and Zhang, Wenqiang}, title = {Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17047-17056} }
Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis: Jiawen Li,

Yuxuan Chen,

Hongbo Chu,

Qiehe Sun,

Tian Guan,

Anjia Han,

Yonghong He; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jiawen and Chen, Yuxuan and Chu, Hongbo and Sun, Qiehe and Guan, Tian and Han, Anjia and He, Yonghong}, title = {Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11323-11332} }
Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition: Yifei Chen,

Dapeng Chen,

Ruijin Liu,

Sai Zhou,

Wenyuan Xue,

Wei Peng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yifei and Chen, Dapeng and Liu, Ruijin and Zhou, Sai and Xue, Wenyuan and Peng, Wei}, title = {Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18688-18698} }
Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions: Yujeong Chae,

Hyeonseong Kim,

Kuk-Jin Yoon; [pdf] [supp]
[bibtex]
@InProceedings{Chae_2024_CVPR, author = {Chae, Yujeong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15162-15172} }
Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences: Seungwook Kim,

Kejie Li,

Xueqing Deng,

Yichun Shi,

Minsu Cho,

Peng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Seungwook and Li, Kejie and Deng, Xueqing and Shi, Yichun and Cho, Minsu and Wang, Peng}, title = {Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10649-10658} }
Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs: Hugh Blayney,

Hanlin Tian,

Hamish Scott,

Nils Goldbeck,

Chess Stetson,

Panagiotis Angeloudis; [pdf] [supp]
[bibtex]
@InProceedings{Blayney_2024_CVPR, author = {Blayney, Hugh and Tian, Hanlin and Scott, Hamish and Goldbeck, Nils and Stetson, Chess and Angeloudis, Panagiotis}, title = {Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15365-15374} }
Can I Trust Your Answer? Visually Grounded Video Question Answering: Junbin Xiao,

Angela Yao,

Yicong Li,

Tat-Seng Chua; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Junbin and Yao, Angela and Li, Yicong and Chua, Tat-Seng}, title = {Can I Trust Your Answer? Visually Grounded Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13204-13214} }
Polos: Multimodal Metric Learning from Human Feedback for Image Captioning: Yuiga Wada,

Kanta Kaneda,

Daichi Saito,

Komei Sugiura; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wada_2024_CVPR, author = {Wada, Yuiga and Kaneda, Kanta and Saito, Daichi and Sugiura, Komei}, title = {Polos: Multimodal Metric Learning from Human Feedback for Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13559-13568} }
Detours for Navigating Instructional Videos: Kumar Ashutosh,

Zihui Xue,

Tushar Nagarajan,

Kristen Grauman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ashutosh_2024_CVPR, author = {Ashutosh, Kumar and Xue, Zihui and Nagarajan, Tushar and Grauman, Kristen}, title = {Detours for Navigating Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18804-18815} }
Discontinuity-preserving Normal Integration with Auxiliary Edges: Hyomin Kim,

Yucheol Jung,

Seungyong Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Hyomin and Jung, Yucheol and Lee, Seungyong}, title = {Discontinuity-preserving Normal Integration with Auxiliary Edges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11915-11923} }
Self-Supervised Multi-Object Tracking with Path Consistency: Zijia Lu,

Bing Shuai,

Yanbei Chen,

Zhenlin Xu,

Davide Modolo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Shuai, Bing and Chen, Yanbei and Xu, Zhenlin and Modolo, Davide}, title = {Self-Supervised Multi-Object Tracking with Path Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19016-19026} }
Improving Distant 3D Object Detection Using 2D Box Supervision: Zetong Yang,

Zhiding Yu,

Chris Choy,

Renhao Wang,

Anima Anandkumar,

Jose M. Alvarez; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Yu, Zhiding and Choy, Chris and Wang, Renhao and Anandkumar, Anima and Alvarez, Jose M.}, title = {Improving Distant 3D Object Detection Using 2D Box Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14853-14863} }
HDQMF: Holographic Feature Decomposition Using Quantum Algorithms: Prathyush Prasanth Poduval,

Zhuowen Zou,

Mohsen Imani; [pdf] [supp]
[bibtex]
@InProceedings{Poduval_2024_CVPR, author = {Poduval, Prathyush Prasanth and Zou, Zhuowen and Imani, Mohsen}, title = {HDQMF: Holographic Feature Decomposition Using Quantum Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10978-10987} }
UniPAD: A Universal Pre-training Paradigm for Autonomous Driving: Honghui Yang,

Sha Zhang,

Di Huang,

Xiaoyang Wu,

Haoyi Zhu,

Tong He,

Shixiang Tang,

Hengshuang Zhao,

Qibo Qiu,

Binbin Lin,

Xiaofei He,

Wanli Ouyang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Honghui and Zhang, Sha and Huang, Di and Wu, Xiaoyang and Zhu, Haoyi and He, Tong and Tang, Shixiang and Zhao, Hengshuang and Qiu, Qibo and Lin, Binbin and He, Xiaofei and Ouyang, Wanli}, title = {UniPAD: A Universal Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15238-15250} }
SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples: Phillip Howard,

Avinash Madasu,

Tiep Le,

Gustavo Lujan Moreno,

Anahita Bhiwandiwalla,

Vasudev Lal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Howard_2024_CVPR, author = {Howard, Phillip and Madasu, Avinash and Le, Tiep and Moreno, Gustavo Lujan and Bhiwandiwalla, Anahita and Lal, Vasudev}, title = {SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11975-11985} }
Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds: Heejoon Moon,

Chunghwan Lee,

Je Hyeong Hong; [pdf] [supp]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Heejoon and Lee, Chunghwan and Hong, Je Hyeong}, title = {Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9773-9783} }
CNC-Net: Self-Supervised Learning for CNC Machining Operations: Mohsen Yavartanoo,

Sangmin Hong,

Reyhaneh Neshatavar,

Kyoung Mu Lee; [pdf] [supp]
[bibtex]
@InProceedings{Yavartanoo_2024_CVPR, author = {Yavartanoo, Mohsen and Hong, Sangmin and Neshatavar, Reyhaneh and Lee, Kyoung Mu}, title = {CNC-Net: Self-Supervised Learning for CNC Machining Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9816-9825} }
OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation: Qidong Huang,

Xiaoyi Dong,

Pan Zhang,

Bin Wang,

Conghui He,

Jiaqi Wang,

Dahua Lin,

Weiming Zhang,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Qidong and Dong, Xiaoyi and Zhang, Pan and Wang, Bin and He, Conghui and Wang, Jiaqi and Lin, Dahua and Zhang, Weiming and Yu, Nenghai}, title = {OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13418-13427} }
Volumetric Environment Representation for Vision-Language Navigation: Rui Liu,

Wenguan Wang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Volumetric Environment Representation for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16317-16328} }
NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows: Zhenggang Tang,

Zhongzheng Ren,

Xiaoming Zhao,

Bowen Wen,

Jonathan Tremblay,

Stan Birchfield,

Alexander Schwing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Zhenggang and Ren, Zhongzheng and Zhao, Xiaoming and Wen, Bowen and Tremblay, Jonathan and Birchfield, Stan and Schwing, Alexander}, title = {NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10293-10303} }
DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking: Fei Xie,

Zhongdao Wang,

Chao Ma; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Fei and Wang, Zhongdao and Ma, Chao}, title = {DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19113-19124} }
Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion: Lucas Nunes,

Rodrigo Marcuzzi,

Benedikt Mersch,

Jens Behley,

Cyrill Stachniss; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nunes_2024_CVPR, author = {Nunes, Lucas and Marcuzzi, Rodrigo and Mersch, Benedikt and Behley, Jens and Stachniss, Cyrill}, title = {Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14770-14780} }
Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World: Wen Yin,

Jian Lou,

Pan Zhou,

Yulai Xie,

Dan Feng,

Yuhua Sun,

Tailai Zhang,

Lichao Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Wen and Lou, Jian and Zhou, Pan and Xie, Yulai and Feng, Dan and Sun, Yuhua and Zhang, Tailai and Sun, Lichao}, title = {Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12733-12743} }
Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models: Gianni Franchi,

Olivier Laurent,

Maxence Leguery,

Andrei Bursuc,

Andrea Pilzer,

Angela Yao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Franchi_2024_CVPR, author = {Franchi, Gianni and Laurent, Olivier and Leguery, Maxence and Bursuc, Andrei and Pilzer, Andrea and Yao, Angela}, title = {Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12194-12204} }
Language-only Training of Zero-shot Composed Image Retrieval: Geonmo Gu,

Sanghyuk Chun,

Wonjae Kim,

Yoohoon Kang,

Sangdoo Yun; [pdf] [supp]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Geonmo and Chun, Sanghyuk and Kim, Wonjae and Kang, Yoohoon and Yun, Sangdoo}, title = {Language-only Training of Zero-shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13225-13234} }
Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment: Angchi Xu,

Wei-Shi Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Angchi and Zheng, Wei-Shi}, title = {Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18253-18262} }
Pixel-Aligned Language Model: Jiarui Xu,

Xingyi Zhou,

Shen Yan,

Xiuye Gu,

Anurag Arnab,

Chen Sun,

Xiaolong Wang,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jiarui and Zhou, Xingyi and Yan, Shen and Gu, Xiuye and Arnab, Anurag and Sun, Chen and Wang, Xiaolong and Schmid, Cordelia}, title = {Pixel-Aligned Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13030-13039} }
SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation: Yanjie Wang,

Xu Zou,

Luxin Yan,

Sheng Zhong,

Jiahuan Zhou; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yanjie and Zou, Xu and Yan, Luxin and Zhong, Sheng and Zhou, Jiahuan}, title = {SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12544-12553} }
Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation: Song Wang,

Jiawei Yu,

Wentong Li,

Wenyu Liu,

Xiaolu Liu,

Junbo Chen,

Jianke Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Song and Yu, Jiawei and Li, Wentong and Liu, Wenyu and Liu, Xiaolu and Chen, Junbo and Zhu, Jianke}, title = {Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14792-14801} }
3D-LFM: Lifting Foundation Model: Mosam Dabhi,

László A. Jeni,

Simon Lucey; [pdf] [supp]
[bibtex]
@InProceedings{Dabhi_2024_CVPR, author = {Dabhi, Mosam and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {3D-LFM: Lifting Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10466-10475} }
Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture: Juanwu Lu,

Can Cui,

Yunsheng Ma,

Aniket Bera,

Ziran Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Juanwu and Cui, Can and Ma, Yunsheng and Bera, Aniket and Wang, Ziran}, title = {Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15428-15437} }
Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users: Daniela Massiceti,

Camilla Longden,

Agnieszka Slowik,

Samuel Wills,

Martin Grayson,

Cecily Morrison; [pdf] [supp]
[bibtex]
@InProceedings{Massiceti_2024_CVPR, author = {Massiceti, Daniela and Longden, Camilla and Slowik, Agnieszka and Wills, Samuel and Grayson, Martin and Morrison, Cecily}, title = {Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12172-12182} }
SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model: Inhwan Bae,

Young-Jae Park,

Hae-Gon Jeon; [pdf] [arXiv]
[bibtex]
@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Park, Young-Jae and Jeon, Hae-Gon}, title = {SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17890-17901} }
Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline: Yu Chen,

Fei Gao,

Yanguang Zhang,

Maoying Qiao,

Nannan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yu and Gao, Fei and Zhang, Yanguang and Qiao, Maoying and Wang, Nannan}, title = {Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15675-15685} }
Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos: Kumaranage Ravindu Yasas Nagasinghe,

Honglu Zhou,

Malitha Gunawardhana,

Martin Renqiang Min,

Daniel Harari,

Muhammad Haris Khan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nagasinghe_2024_CVPR, author = {Nagasinghe, Kumaranage Ravindu Yasas and Zhou, Honglu and Gunawardhana, Malitha and Min, Martin Renqiang and Harari, Daniel and Khan, Muhammad Haris}, title = {Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18816-18826} }
FreeKD: Knowledge Distillation via Semantic Frequency Prompt: Yuan Zhang,

Tao Huang,

Jiaming Liu,

Tao Jiang,

Kuan Cheng,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuan and Huang, Tao and Liu, Jiaming and Jiang, Tao and Cheng, Kuan and Zhang, Shanghang}, title = {FreeKD: Knowledge Distillation via Semantic Frequency Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15931-15940} }
Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models: Himangi Mittal,

Nakul Agarwal,

Shao-Yuan Lo,

Kwonjoon Lee; [pdf] [supp]
[bibtex]
@InProceedings{Mittal_2024_CVPR, author = {Mittal, Himangi and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon}, title = {Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18580-18590} }
On the Estimation of Image-matching Uncertainty in Visual Place Recognition: Mubariz Zaffar,

Liangliang Nan,

Julian F. P. Kooij; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zaffar_2024_CVPR, author = {Zaffar, Mubariz and Nan, Liangliang and Kooij, Julian F. P.}, title = {On the Estimation of Image-matching Uncertainty in Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17743-17753} }
Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection: Junxi Chen,

Liang Li,

Li Su,

Zheng-jun Zha,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Junxi and Li, Liang and Su, Li and Zha, Zheng-jun and Huang, Qingming}, title = {Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18319-18329} }
Non-autoregressive Sequence-to-Sequence Vision-Language Models: Kunyu Shi,

Qi Dong,

Luis Goncalves,

Zhuowen Tu,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Kunyu and Dong, Qi and Goncalves, Luis and Tu, Zhuowen and Soatto, Stefano}, title = {Non-autoregressive Sequence-to-Sequence Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13603-13612} }
Active Object Detection with Knowledge Aggregation and Distillation from Large Models: Dejie Yang,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Dejie and Liu, Yang}, title = {Active Object Detection with Knowledge Aggregation and Distillation from Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16624-16633} }
Weak-to-Strong 3D Object Detection with X-Ray Distillation: Alexander Gambashidze,

Aleksandr Dadukin,

Maxim Golyadkin,

Maria Razzhivina,

Ilya Makarov; [pdf] [supp]
[bibtex]
@InProceedings{Gambashidze_2024_CVPR, author = {Gambashidze, Alexander and Dadukin, Aleksandr and Golyadkin, Maxim and Razzhivina, Maria and Makarov, Ilya}, title = {Weak-to-Strong 3D Object Detection with X-Ray Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15055-15064} }
Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations: Lei Fan,

Jianxiong Zhou,

Xiaoying Xing,

Ying Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Zhou, Jianxiong and Xing, Xiaoying and Wu, Ying}, title = {Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16394-16403} }
Efficient Meshflow and Optical Flow Estimation from Event Cameras: Xinglong Luo,

Ao Luo,

Zhengning Wang,

Chunyu Lin,

Bing Zeng,

Shuaicheng Liu; [pdf]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Lin, Chunyu and Zeng, Bing and Liu, Shuaicheng}, title = {Efficient Meshflow and Optical Flow Estimation from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19198-19207} }
Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models: Yushi Hu,

Otilia Stretcu,

Chun-Ta Lu,

Krishnamurthy Viswanathan,

Kenji Hata,

Enming Luo,

Ranjay Krishna,

Ariel Fuxman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hu_2024_CVPR, author = {Hu, Yushi and Stretcu, Otilia and Lu, Chun-Ta and Viswanathan, Krishnamurthy and Hata, Kenji and Luo, Enming and Krishna, Ranjay and Fuxman, Ariel}, title = {Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9590-9601} }
A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives: Simone Alberto Peirone,

Francesca Pistilli,

Antonio Alliegro,

Giuseppe Averta; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peirone_2024_CVPR, author = {Peirone, Simone Alberto and Pistilli, Francesca and Alliegro, Antonio and Averta, Giuseppe}, title = {A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18275-18285} }
Visual In-Context Prompting: Feng Li,

Qing Jiang,

Hao Zhang,

Tianhe Ren,

Shilong Liu,

Xueyan Zou,

Huaizhe Xu,

Hongyang Li,

Jianwei Yang,

Chunyuan Li,

Lei Zhang,

Jianfeng Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Feng and Jiang, Qing and Zhang, Hao and Ren, Tianhe and Liu, Shilong and Zou, Xueyan and Xu, Huaizhe and Li, Hongyang and Yang, Jianwei and Li, Chunyuan and Zhang, Lei and Gao, Jianfeng}, title = {Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12861-12871} }
Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions: Weizhen He,

Yiheng Deng,

Shixiang Tang,

Qihao Chen,

Qingsong Xie,

Yizhou Wang,

Lei Bai,

Feng Zhu,

Rui Zhao,

Wanli Ouyang,

Donglian Qi,

Yunfeng Yan; [pdf] [supp]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Weizhen and Deng, Yiheng and Tang, Shixiang and Chen, Qihao and Xie, Qingsong and Wang, Yizhou and Bai, Lei and Zhu, Feng and Zhao, Rui and Ouyang, Wanli and Qi, Donglian and Yan, Yunfeng}, title = {Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17521-17531} }
IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM: Minghao Yin,

Shangzhe Wu,

Kai Han; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Minghao and Wu, Shangzhe and Han, Kai}, title = {IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10563-10573} }
CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment: Sajid Javed,

Arif Mahmood,

Iyyakutti Iyappan Ganapathi,

Fayaz Ali Dharejo,

Naoufel Werghi,

Mohammed Bennamoun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Javed_2024_CVPR, author = {Javed, Sajid and Mahmood, Arif and Ganapathi, Iyyakutti Iyappan and Dharejo, Fayaz Ali and Werghi, Naoufel and Bennamoun, Mohammed}, title = {CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11450-11459} }
Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector: Yifu Ding,

Weilun Feng,

Chuyan Chen,

Jinyang Guo,

Xianglong Liu; [pdf] [supp]
[bibtex]
@InProceedings{Ding_2024_CVPR, author = {Ding, Yifu and Feng, Weilun and Chen, Chuyan and Guo, Jinyang and Liu, Xianglong}, title = {Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16174-16184} }
Action Scene Graphs for Long-Form Understanding of Egocentric Videos: Ivan Rodin,

Antonino Furnari,

Kyle Min,

Subarna Tripathi,

Giovanni Maria Farinella; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rodin_2024_CVPR, author = {Rodin, Ivan and Furnari, Antonino and Min, Kyle and Tripathi, Subarna and Farinella, Giovanni Maria}, title = {Action Scene Graphs for Long-Form Understanding of Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18622-18632} }
De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts: Yuzheng Wang,

Dingkang Yang,

Zhaoyu Chen,

Yang Liu,

Siao Liu,

Wenqiang Zhang,

Lihua Zhang,

Lizhe Qi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yuzheng and Yang, Dingkang and Chen, Zhaoyu and Liu, Yang and Liu, Siao and Zhang, Wenqiang and Zhang, Lihua and Qi, Lizhe}, title = {De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12615-12625} }
Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding: Chaolei Tan,

Jianhuang Lai,

Wei-Shi Zheng,

Jian-Fang Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Chaolei and Lai, Jianhuang and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13569-13580} }
LEOD: Label-Efficient Object Detection for Event Cameras: Ziyi Wu,

Mathias Gehrig,

Qing Lyu,

Xudong Liu,

Igor Gilitschenski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Ziyi and Gehrig, Mathias and Lyu, Qing and Liu, Xudong and Gilitschenski, Igor}, title = {LEOD: Label-Efficient Object Detection for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16933-16943} }
Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology: Andrew H. Song,

Richard J. Chen,

Tong Ding,

Drew F.K. Williamson,

Guillaume Jaume,

Faisal Mahmood; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Song_2024_CVPR, author = {Song, Andrew H. and Chen, Richard J. and Ding, Tong and Williamson, Drew F.K. and Jaume, Guillaume and Mahmood, Faisal}, title = {Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11566-11578} }
Dense Optical Tracking: Connecting the Dots: Guillaume Le Moing,

Jean Ponce,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Le_Moing_2024_CVPR, author = {Le Moing, Guillaume and Ponce, Jean and Schmid, Cordelia}, title = {Dense Optical Tracking: Connecting the Dots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19187-19197} }
A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning: Xiaoyang Xu,

Mengda Yang,

Wenzhe Yi,

Ziang Li,

Juan Wang,

Hongxin Hu,

Yong Zhuang,

Yaxin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaoyang and Yang, Mengda and Yi, Wenzhe and Li, Ziang and Wang, Juan and Hu, Hongxin and Zhuang, Yong and Liu, Yaxin}, title = {A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12130-12139} }
TULIP: Transformer for Upsampling of LiDAR Point Clouds: Bin Yang,

Patrick Pfreundschuh,

Roland Siegwart,

Marco Hutter,

Peyman Moghadam,

Vaishakh Patil; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Pfreundschuh, Patrick and Siegwart, Roland and Hutter, Marco and Moghadam, Peyman and Patil, Vaishakh}, title = {TULIP: Transformer for Upsampling of LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15354-15364} }
BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning: Ruyang Liu,

Chen Li,

Yixiao Ge,

Thomas H. Li,

Ying Shan,

Ge Li; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Ruyang and Li, Chen and Ge, Yixiao and Li, Thomas H. and Shan, Ying and Li, Ge}, title = {BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13658-13667} }
Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts: Fei Ni,

Jianye Hao,

Shiguang Wu,

Longxin Kou,

Jiashun Liu,

Yan Zheng,

Bin Wang,

Yuzheng Zhuang; [pdf] [supp]
[bibtex]
@InProceedings{Ni_2024_CVPR, author = {Ni, Fei and Hao, Jianye and Wu, Shiguang and Kou, Longxin and Liu, Jiashun and Zheng, Yan and Wang, Bin and Zhuang, Yuzheng}, title = {Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13991-14000} }
Asymmetric Masked Distillation for Pre-Training Small Foundation Models: Zhiyu Zhao,

Bingkun Huang,

Sen Xing,

Gangshan Wu,

Yu Qiao,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhiyu and Huang, Bingkun and Xing, Sen and Wu, Gangshan and Qiao, Yu and Wang, Limin}, title = {Asymmetric Masked Distillation for Pre-Training Small Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18516-18526} }
MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception: Yiran Qin,

Enshen Zhou,

Qichang Liu,

Zhenfei Yin,

Lu Sheng,

Ruimao Zhang,

Yu Qiao,

Jing Shao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2024_CVPR, author = {Qin, Yiran and Zhou, Enshen and Liu, Qichang and Yin, Zhenfei and Sheng, Lu and Zhang, Ruimao and Qiao, Yu and Shao, Jing}, title = {MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16307-16316} }
Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly: Hang Du,

Sicheng Zhang,

Binzhu Xie,

Guoshun Nan,

Jiayang Zhang,

Junrui Xu,

Hangyu Liu,

Sicong Leng,

Jiangming Liu,

Hehe Fan,

Dajiu Huang,

Jing Feng,

Linli Chen,

Can Zhang,

Xuhuan Li,

Hao Zhang,

Jianhang Chen,

Qimei Cui,

Xiaofeng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2024_CVPR, author = {Du, Hang and Zhang, Sicheng and Xie, Binzhu and Nan, Guoshun and Zhang, Jiayang and Xu, Junrui and Liu, Hangyu and Leng, Sicong and Liu, Jiangming and Fan, Hehe and Huang, Dajiu and Feng, Jing and Chen, Linli and Zhang, Can and Li, Xuhuan and Zhang, Hao and Chen, Jianhang and Cui, Qimei and Tao, Xiaofeng}, title = {Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18793-18803} }
MiKASA: Multi-Key-Anchor & Scene-Aware Transformer for 3D Visual Grounding: Chun-Peng Chang,

Shaoxiang Wang,

Alain Pagani,

Didier Stricker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chang_2024_CVPR, author = {Chang, Chun-Peng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {MiKASA: Multi-Key-Anchor \& Scene-Aware Transformer for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14131-14140} }
ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting: Yankai Jiang,

Zhongzhen Huang,

Rongzhao Zhang,

Xiaofan Zhang,

Shaoting Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yankai and Huang, Zhongzhen and Zhang, Rongzhao and Zhang, Xiaofan and Zhang, Shaoting}, title = {ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11386-11397} }
Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection: Jin Yang,

Ping Wei,

Huan Li,

Ziyang Ren; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Jin and Wei, Ping and Li, Huan and Ren, Ziyang}, title = {Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18308-18318} }
MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training: Pavan Kumar Anasosalu Vasu,

Hadi Pouransari,

Fartash Faghri,

Raviteja Vemulapalli,

Oncel Tuzel; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vasu_2024_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Pouransari, Hadi and Faghri, Fartash and Vemulapalli, Raviteja and Tuzel, Oncel}, title = {MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15963-15974} }
VideoCon: Robust Video-Language Alignment via Contrast Captions: Hritik Bansal,

Yonatan Bitton,

Idan Szpektor,

Kai-Wei Chang,

Aditya Grover; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bansal_2024_CVPR, author = {Bansal, Hritik and Bitton, Yonatan and Szpektor, Idan and Chang, Kai-Wei and Grover, Aditya}, title = {VideoCon: Robust Video-Language Alignment via Contrast Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13927-13937} }
Discovering and Mitigating Visual Biases through Keyword Explanation: Younghyun Kim,

Sangwoo Mo,

Minkyu Kim,

Kyungmin Lee,

Jaeho Lee,

Jinwoo Shin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Younghyun and Mo, Sangwoo and Kim, Minkyu and Lee, Kyungmin and Lee, Jaeho and Shin, Jinwoo}, title = {Discovering and Mitigating Visual Biases through Keyword Explanation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11082-11092} }
Robust Emotion Recognition in Context Debiasing: Dingkang Yang,

Kun Yang,

Mingcheng Li,

Shunli Wang,

Shuaibing Wang,

Lihua Zhang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Dingkang and Yang, Kun and Li, Mingcheng and Wang, Shunli and Wang, Shuaibing and Zhang, Lihua}, title = {Robust Emotion Recognition in Context Debiasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12447-12457} }
CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation: Townim Faisal Chowdhury,

Kewen Liao,

Vu Minh Hieu Phan,

Minh-Son To,

Yutong Xie,

Kevin Hung,

David Ross,

Anton van den Hengel,

Johan W. Verjans,

Zhibin Liao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chowdhury_2024_CVPR, author = {Chowdhury, Townim Faisal and Liao, Kewen and Phan, Vu Minh Hieu and To, Minh-Son and Xie, Yutong and Hung, Kevin and Ross, David and van den Hengel, Anton and Verjans, Johan W. and Liao, Zhibin}, title = {CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11072-11081} }
Multi-Space Alignments Towards Universal LiDAR Segmentation: Youquan Liu,

Lingdong Kong,

Xiaoyang Wu,

Runnan Chen,

Xin Li,

Liang Pan,

Ziwei Liu,

Yuexin Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Youquan and Kong, Lingdong and Wu, Xiaoyang and Chen, Runnan and Li, Xin and Pan, Liang and Liu, Ziwei and Ma, Yuexin}, title = {Multi-Space Alignments Towards Universal LiDAR Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14648-14661} }
FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models: Ao Luo,

Xin Li,

Fan Yang,

Jiangyu Liu,

Haoqiang Fan,

Shuaicheng Liu; [pdf]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Liu, Jiangyu and Fan, Haoqiang and Liu, Shuaicheng}, title = {FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19167-19176} }
Free3D: Consistent Novel View Synthesis without 3D Representation: Chuanxia Zheng,

Andrea Vedaldi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chuanxia and Vedaldi, Andrea}, title = {Free3D: Consistent Novel View Synthesis without 3D Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9720-9731} }
WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion: Khiem Vuong,

N Dinesh Reddy,

Robert Tamburo,

Srinivasa G. Narasimhan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vuong_2024_CVPR, author = {Vuong, Khiem and Reddy, N Dinesh and Tamburo, Robert and Narasimhan, Srinivasa G.}, title = {WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9514-9524} }
Towards Language-Driven Video Inpainting via Multimodal Large Language Models: Jianzong Wu,

Xiangtai Li,

Chenyang Si,

Shangchen Zhou,

Jingkang Yang,

Jiangning Zhang,

Yining Li,

Kai Chen,

Yunhai Tong,

Ziwei Liu,

Chen Change Loy; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Jianzong and Li, Xiangtai and Si, Chenyang and Zhou, Shangchen and Yang, Jingkang and Zhang, Jiangning and Li, Yining and Chen, Kai and Tong, Yunhai and Liu, Ziwei and Loy, Chen Change}, title = {Towards Language-Driven Video Inpainting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12501-12511} }
CLIP-KD: An Empirical Study of CLIP Model Distillation: Chuanguang Yang,

Zhulin An,

Libo Huang,

Junyu Bi,

Xinqiang Yu,

Han Yang,

Boyu Diao,

Yongjun Xu; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Chuanguang and An, Zhulin and Huang, Libo and Bi, Junyu and Yu, Xinqiang and Yang, Han and Diao, Boyu and Xu, Yongjun}, title = {CLIP-KD: An Empirical Study of CLIP Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15952-15962} }
OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning: Lingyi Hong,

Shilin Yan,

Renrui Zhang,

Wanyun Li,

Xinyu Zhou,

Pinxue Guo,

Kaixun Jiang,

Yiting Chen,

Jinglun Li,

Zhaoyu Chen,

Wenqiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Lingyi and Yan, Shilin and Zhang, Renrui and Li, Wanyun and Zhou, Xinyu and Guo, Pinxue and Jiang, Kaixun and Chen, Yiting and Li, Jinglun and Chen, Zhaoyu and Zhang, Wenqiang}, title = {OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19079-19091} }
SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models: Tongtian Yue,

Jie Cheng,

Longteng Guo,

Xingyuan Dai,

Zijia Zhao,

Xingjian He,

Gang Xiong,

Yisheng Lv,

Jing Liu; [pdf] [supp]
[bibtex]
@InProceedings{Yue_2024_CVPR, author = {Yue, Tongtian and Cheng, Jie and Guo, Longteng and Dai, Xingyuan and Zhao, Zijia and He, Xingjian and Xiong, Gang and Lv, Yisheng and Liu, Jing}, title = {SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13073-13083} }
NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images: Yufei Han,

Heng Guo,

Koki Fukai,

Hiroaki Santo,

Boxin Shi,

Fumio Okura,

Zhanyu Ma,

Yunpeng Jia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Yufei and Guo, Heng and Fukai, Koki and Santo, Hiroaki and Shi, Boxin and Okura, Fumio and Ma, Zhanyu and Jia, Yunpeng}, title = {NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11821-11830} }
Retrieval-Augmented Embodied Agents: Yichen Zhu,

Zhicai Ou,

Xiaofeng Mou,

Jian Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yichen and Ou, Zhicai and Mou, Xiaofeng and Tang, Jian}, title = {Retrieval-Augmented Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17985-17995} }
SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection: Gang Zhang,

Junnan Chen,

Guohuan Gao,

Jianmin Li,

Si Liu,

Xiaolin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gang and Chen, Junnan and Gao, Guohuan and Li, Jianmin and Liu, Si and Hu, Xiaolin}, title = {SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14477-14486} }
HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection: Qiming Xia,

Wei Ye,

Hai Wu,

Shijia Zhao,

Leyuan Xing,

Xun Huang,

Jinhao Deng,

Xin Li,

Chenglu Wen,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Qiming and Ye, Wei and Wu, Hai and Zhao, Shijia and Xing, Leyuan and Huang, Xun and Deng, Jinhao and Li, Xin and Wen, Chenglu and Wang, Cheng}, title = {HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15321-15330} }
Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training: Shizhan Gong,

Qi Dou,

Farzan Farnia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Shizhan and Dou, Qi and Farnia, Farzan}, title = {Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11009-11018} }
3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces: Linyi Jin,

Nilesh Kulkarni,

David F. Fouhey; [pdf] [arXiv]
[bibtex]
@InProceedings{Jin_2024_CVPR, author = {Jin, Linyi and Kulkarni, Nilesh and Fouhey, David F.}, title = {3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9742-9751} }
MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes: Bor-Shiun Wang,

Chien-Yi Wang,

Wei-Chen Chiu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Bor-Shiun and Wang, Chien-Yi and Chiu, Wei-Chen}, title = {MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10885-10894} }
ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers: Narges Norouzi,

Svetlana Orlova,

Daan de Geus,

Gijs Dubbelman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Norouzi_2024_CVPR, author = {Norouzi, Narges and Orlova, Svetlana and de Geus, Daan and Dubbelman, Gijs}, title = {ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15773-15782} }
Single-Model and Any-Modality for Video Object Tracking: Zongwei Wu,

Jilai Zheng,

Xiangxuan Ren,

Florin-Alexandru Vasluianu,

Chao Ma,

Danda Pani Paudel,

Luc Van Gool,

Radu Timofte; [pdf] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Zongwei and Zheng, Jilai and Ren, Xiangxuan and Vasluianu, Florin-Alexandru and Ma, Chao and Paudel, Danda Pani and Van Gool, Luc and Timofte, Radu}, title = {Single-Model and Any-Modality for Video Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19156-19166} }
FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking: Seokju Cho,

Jiahui Huang,

Seungryong Kim,

Joon-Young Lee; [pdf]
[bibtex]
@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19268-19277} }
Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding: Wujian Peng,

Sicheng Xie,

Zuyao You,

Shiyi Lan,

Zuxuan Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Wujian and Xie, Sicheng and You, Zuyao and Lan, Shiyi and Wu, Zuxuan}, title = {Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13279-13288} }
WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification: Satish Kumar,

Bowen Zhang,

Chandrakanth Gudavalli,

Connor Levenson,

Lacey Hughey,

Jared A. Stabach,

Irene Amoke,

Gordon Ojwang,

Joseph Mukeka,

Stephen Mwiu,

Joseph Ogutu,

Howard Frederick,

B.S. Manjunath; [pdf] [supp]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Satish and Zhang, Bowen and Gudavalli, Chandrakanth and Levenson, Connor and Hughey, Lacey and Stabach, Jared A. and Amoke, Irene and Ojwang, Gordon and Mukeka, Joseph and Mwiu, Stephen and Ogutu, Joseph and Frederick, Howard and Manjunath, B.S.}, title = {WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12594-12604} }
Tune-An-Ellipse: CLIP Has Potential to Find What You Want: Jinheng Xie,

Songhe Deng,

Bing Li,

Haozhe Liu,

Yawen Huang,

Yefeng Zheng,

Jurgen Schmidhuber,

Bernard Ghanem,

Linlin Shen,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Jinheng and Deng, Songhe and Li, Bing and Liu, Haozhe and Huang, Yawen and Zheng, Yefeng and Schmidhuber, Jurgen and Ghanem, Bernard and Shen, Linlin and Shou, Mike Zheng}, title = {Tune-An-Ellipse: CLIP Has Potential to Find What You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13723-13732} }
Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation: Huyong Wang,

Huisi Wu,

Jing Qin; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Huyong and Wu, Huisi and Qin, Jing}, title = {Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11408-11417} }
DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction: Weiyi Lv,

Yuhang Huang,

Ning Zhang,

Ruei-Sung Lin,

Mei Han,

Dan Zeng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2024_CVPR, author = {Lv, Weiyi and Huang, Yuhang and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Zeng, Dan}, title = {DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19321-19330} }
Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living: Dominick Reilly,

Srijan Das; [pdf] [supp]
[bibtex]
@InProceedings{Reilly_2024_CVPR, author = {Reilly, Dominick and Das, Srijan}, title = {Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18340-18350} }
ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification: Jiangbo Shi,

Chen Li,

Tieliang Gong,

Yefeng Zheng,

Huazhu Fu; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Jiangbo and Li, Chen and Gong, Tieliang and Zheng, Yefeng and Fu, Huazhu}, title = {ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11248-11258} }
CapsFusion: Rethinking Image-Text Data at Scale: Qiying Yu,

Quan Sun,

Xiaosong Zhang,

Yufeng Cui,

Fan Zhang,

Yue Cao,

Xinlong Wang,

Jingjing Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Qiying and Sun, Quan and Zhang, Xiaosong and Cui, Yufeng and Zhang, Fan and Cao, Yue and Wang, Xinlong and Liu, Jingjing}, title = {CapsFusion: Rethinking Image-Text Data at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14022-14032} }
Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images: Wei Shao,

YangYang Shi,

Daoqiang Zhang,

JunJie Zhou,

Peng Wan; [pdf]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Wei and Shi, YangYang and Zhang, Daoqiang and Zhou, JunJie and Wan, Peng}, title = {Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11694-11703} }
Towards Generalizable Multi-Object Tracking: Zheng Qin,

Le Wang,

Sanping Zhou,

Panpan Fu,

Gang Hua,

Wei Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qin_2024_CVPR, author = {Qin, Zheng and Wang, Le and Zhou, Sanping and Fu, Panpan and Hua, Gang and Tang, Wei}, title = {Towards Generalizable Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18995-19004} }
Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction: Yizhi Wang,

Wallace Lira,

Wenqi Wang,

Ali Mahdavi-Amiri,

Hao Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yizhi and Lira, Wallace and Wang, Wenqi and Mahdavi-Amiri, Ali and Zhang, Hao}, title = {Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9881-9891} }
IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration: Tai Ma,

Suwei Zhang,

Jiafeng Li,

Ying Wen; [pdf] [supp]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Tai and Zhang, Suwei and Li, Jiafeng and Wen, Ying}, title = {IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11546-11555} }
SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection: Peng Qi,

Zehong Yan,

Wynne Hsu,

Mong Li Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Peng and Yan, Zehong and Hsu, Wynne and Lee, Mong Li}, title = {SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13052-13062} }
Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning: Nirat Saini,

Khoi Pham,

Abhinav Shrivastava; [pdf] [supp]
[bibtex]
@InProceedings{Saini_2024_CVPR, author = {Saini, Nirat and Pham, Khoi and Shrivastava, Abhinav}, title = {Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14466-14476} }
Unleashing Network Potentials for Semantic Scene Completion: Fengyun Wang,

Qianru Sun,

Dong Zhang,

Jinhui Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Fengyun and Sun, Qianru and Zhang, Dong and Tang, Jinhui}, title = {Unleashing Network Potentials for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10314-10323} }
Learning Occupancy for Monocular 3D Object Detection: Liang Peng,

Junkai Xu,

Haoran Cheng,

Zheng Yang,

Xiaopei Wu,

Wei Qian,

Wenxiao Wang,

Boxi Wu,

Deng Cai; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Liang and Xu, Junkai and Cheng, Haoran and Yang, Zheng and Wu, Xiaopei and Qian, Wei and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Learning Occupancy for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10281-10292} }
LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection: Dat Nguyen,

Nesryne Mejri,

Inder Pal Singh,

Polina Kuleshova,

Marcella Astrid,

Anis Kacem,

Enjie Ghorbel,

Djamila Aouada; [pdf] [supp]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Dat and Mejri, Nesryne and Singh, Inder Pal and Kuleshova, Polina and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17395-17405} }
Rotation-Agnostic Image Representation Learning for Digital Pathology: Saghir Alfasly,

Abubakr Shafique,

Peyman Nejat,

Jibran Khan,

Areej Alsaafin,

Ghazal Alabtah,

H.R. Tizhoosh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Alfasly_2024_CVPR, author = {Alfasly, Saghir and Shafique, Abubakr and Nejat, Peyman and Khan, Jibran and Alsaafin, Areej and Alabtah, Ghazal and Tizhoosh, H.R.}, title = {Rotation-Agnostic Image Representation Learning for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11683-11693} }
EASE-DETR: Easing the Competition among Object Queries: Yulu Gao,

Yifan Sun,

Xudong Ding,

Chuyang Zhao,

Si Liu; [pdf]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yulu and Sun, Yifan and Ding, Xudong and Zhao, Chuyang and Liu, Si}, title = {EASE-DETR: Easing the Competition among Object Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17282-17291} }
Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation: Hang Li,

Chengzhi Shen,

Philip Torr,

Volker Tresp,

Jindong Gu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hang and Shen, Chengzhi and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12006-12016} }
HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models: Yifan Yang,

Dong Liu,

Shuhai Zhang,

Zeshuai Deng,

Zixiong Huang,

Mingkui Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yifan and Liu, Dong and Zhang, Shuhai and Deng, Zeshuai and Huang, Zixiong and Tan, Mingkui}, title = {HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10671-10681} }
Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences: Minyoung Hwang,

Luca Weihs,

Chanwoo Park,

Kimin Lee,

Aniruddha Kembhavi,

Kiana Ehsani; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hwang_2024_CVPR, author = {Hwang, Minyoung and Weihs, Luca and Park, Chanwoo and Lee, Kimin and Kembhavi, Aniruddha and Ehsani, Kiana}, title = {Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16216-16226} }
Neural Underwater Scene Representation: Yunkai Tang,

Chengxuan Zhu,

Renjie Wan,

Chao Xu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Yunkai and Zhu, Chengxuan and Wan, Renjie and Xu, Chao and Shi, Boxin}, title = {Neural Underwater Scene Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11780-11789} }
Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos: Yuhan Shen,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18186-18197} }
Constrained Layout Generation with Factor Graphs: Mohammed Haroon Dupty,

Yanfei Dong,

Sicong Leng,

Guoji Fu,

Yong Liang Goh,

Wei Lu,

Wee Sun Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Dupty_2024_CVPR, author = {Dupty, Mohammed Haroon and Dong, Yanfei and Leng, Sicong and Fu, Guoji and Goh, Yong Liang and Lu, Wei and Lee, Wee Sun}, title = {Constrained Layout Generation with Factor Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12851-12860} }
SLICE: Stabilized LIME for Consistent Explanations for Image Classification: Revoti Prasad Bora,

Philipp Terhörst,

Raymond Veldhuis,

Raghavendra Ramachandra,

Kiran Raja; [pdf] [supp]
[bibtex]
@InProceedings{Bora_2024_CVPR, author = {Bora, Revoti Prasad and Terh\"orst, Philipp and Veldhuis, Raymond and Ramachandra, Raghavendra and Raja, Kiran}, title = {SLICE: Stabilized LIME for Consistent Explanations for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10988-10996} }
Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection: Jiawen Zhu,

Choubo Ding,

Yu Tian,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Ding, Choubo and Tian, Yu and Pang, Guansong}, title = {Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17616-17626} }
Revisiting Counterfactual Problems in Referring Expression Comprehension: Zhihan Yu,

Ruifan Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Zhihan and Li, Ruifan}, title = {Revisiting Counterfactual Problems in Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13438-13448} }
Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis: Simon Niedermayr,

Josef Stumpfegger,

Rüdiger Westermann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Niedermayr_2024_CVPR, author = {Niedermayr, Simon and Stumpfegger, Josef and Westermann, R\"udiger}, title = {Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10349-10358} }
Separating the "Chirp" from the "Chat": Self-supervised Visual Grounding of Sound and Language: Mark Hamilton,

Andrew Zisserman,

John R. Hershey,

William T. Freeman; [pdf] [arXiv]
[bibtex]
@InProceedings{Hamilton_2024_CVPR, author = {Hamilton, Mark and Zisserman, Andrew and Hershey, John R. and Freeman, William T.}, title = {Separating the ''Chirp'' from the ''Chat'': Self-supervised Visual Grounding of Sound and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13117-13127} }
MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding: Bo He,

Hengduo Li,

Young Kyun Jang,

Menglin Jia,

Xuefei Cao,

Ashish Shah,

Abhinav Shrivastava,

Ser-Nam Lim; [pdf] [supp]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Bo and Li, Hengduo and Jang, Young Kyun and Jia, Menglin and Cao, Xuefei and Shah, Ashish and Shrivastava, Abhinav and Lim, Ser-Nam}, title = {MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13504-13514} }
Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning: Chen Zhao,

Shuming Liu,

Karttikeya Mangalam,

Guocheng Qian,

Fatimah Zohra,

Abdulmohsen Alghannam,

Jitendra Malik,

Bernard Ghanem; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Liu, Shuming and Mangalam, Karttikeya and Qian, Guocheng and Zohra, Fatimah and Alghannam, Abdulmohsen and Malik, Jitendra and Ghanem, Bernard}, title = {Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15835-15844} }
PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos: Qi Zhao,

M. Salman Asif,

Zhan Ma; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Qi and Asif, M. Salman and Ma, Zhan}, title = {PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19103-19112} }
Point Transformer V3: Simpler Faster Stronger: Xiaoyang Wu,

Li Jiang,

Peng-Shuai Wang,

Zhijian Liu,

Xihui Liu,

Yu Qiao,

Wanli Ouyang,

Tong He,

Hengshuang Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyang and Jiang, Li and Wang, Peng-Shuai and Liu, Zhijian and Liu, Xihui and Qiao, Yu and Ouyang, Wanli and He, Tong and Zhao, Hengshuang}, title = {Point Transformer V3: Simpler Faster Stronger}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4840-4851} }
Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems: Haoquan Zhang,

Ronggang Huang,

Yi Xie,

Huaidong Zhang; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haoquan and Huang, Ronggang and Xie, Yi and Zhang, Huaidong}, title = {Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13373-13383} }
RCL: Reliable Continual Learning for Unified Failure Detection: Fei Zhu,

Zhen Cheng,

Xu-Yao Zhang,

Cheng-Lin Liu,

Zhaoxiang Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Fei and Cheng, Zhen and Zhang, Xu-Yao and Liu, Cheng-Lin and Zhang, Zhaoxiang}, title = {RCL: Reliable Continual Learning for Unified Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12140-12150} }
Referring Image Editing: Object-level Image Editing via Referring Expressions: Chang Liu,

Xiangtai Li,

Henghui Ding; [pdf]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Li, Xiangtai and Ding, Henghui}, title = {Referring Image Editing: Object-level Image Editing via Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13128-13138} }
Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training: Arun Reddy,

William Paul,

Corban Rivera,

Ketul Shah,

Celso M. de Melo,

Rama Chellappa; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Reddy_2024_CVPR, author = {Reddy, Arun and Paul, William and Rivera, Corban and Shah, Ketul and de Melo, Celso M. and Chellappa, Rama}, title = {Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18919-18929} }
UniDepth: Universal Monocular Metric Depth Estimation: Luigi Piccinelli,

Yung-Hsu Yang,

Christos Sakaridis,

Mattia Segu,

Siyuan Li,

Luc Van Gool,

Fisher Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Piccinelli_2024_CVPR, author = {Piccinelli, Luigi and Yang, Yung-Hsu and Sakaridis, Christos and Segu, Mattia and Li, Siyuan and Van Gool, Luc and Yu, Fisher}, title = {UniDepth: Universal Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10106-10116} }
NeuRAD: Neural Rendering for Autonomous Driving: Adam Tonderski,

Carl Lindström,

Georg Hess,

William Ljungbergh,

Lennart Svensson,

Christoffer Petersson; [pdf] [supp]
[bibtex]
@InProceedings{Tonderski_2024_CVPR, author = {Tonderski, Adam and Lindstr\"om, Carl and Hess, Georg and Ljungbergh, William and Svensson, Lennart and Petersson, Christoffer}, title = {NeuRAD: Neural Rendering for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14895-14904} }
Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models: Weiwei Cao,

Jianpeng Zhang,

Yingda Xia,

Tony C. W. Mok,

Zi Li,

Xianghua Ye,

Le Lu,

Jian Zheng,

Yuxing Tang,

Ling Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Weiwei and Zhang, Jianpeng and Xia, Yingda and Mok, Tony C. W. and Li, Zi and Ye, Xianghua and Lu, Le and Zheng, Jian and Tang, Yuxing and Zhang, Ling}, title = {Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11238-11247} }
Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification: Pingping Zhang,

Yuhao Wang,

Yang Liu,

Zhengzheng Tu,

Huchuan Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Wang, Yuhao and Liu, Yang and Tu, Zhengzheng and Lu, Huchuan}, title = {Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17117-17126} }
SignGraph: A Sign Sequence is Worth Graphs of Nodes: Shiwei Gan,

Yafeng Yin,

Zhiwei Jiang,

Hongkai Wen,

Lei Xie,

Sanglu Lu; [pdf] [supp]
[bibtex]
@InProceedings{Gan_2024_CVPR, author = {Gan, Shiwei and Yin, Yafeng and Jiang, Zhiwei and Wen, Hongkai and Xie, Lei and Lu, Sanglu}, title = {SignGraph: A Sign Sequence is Worth Graphs of Nodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13470-13479} }
DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking: Cheng Huang,

Shoudong Han,

Mengyu He,

Wenbo Zheng,

Yuhao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Cheng and Han, Shoudong and He, Mengyu and Zheng, Wenbo and Wei, Yuhao}, title = {DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19290-19299} }
HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction: Yi Zhou,

Hui Zhang,

Jiaqian Yu,

Yifan Yang,

Sangil Jung,

Seung-In Park,

ByungIn Yoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yi and Zhang, Hui and Yu, Jiaqian and Yang, Yifan and Jung, Sangil and Park, Seung-In and Yoo, ByungIn}, title = {HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15396-15406} }
Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization: Guopeng Li,

Ming Qian,

Gui-Song Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Guopeng and Qian, Ming and Xia, Gui-Song}, title = {Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16719-16729} }
PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation: Yuqi Wang,

Yuntao Chen,

Xingyu Liao,

Lue Fan,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and Chen, Yuntao and Liao, Xingyu and Fan, Lue and Zhang, Zhaoxiang}, title = {PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17158-17168} }
Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo: Mohammed Brahimi,

Bjoern Haefner,

Zhenzhang Ye,

Bastian Goldluecke,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Brahimi_2024_CVPR, author = {Brahimi, Mohammed and Haefner, Bjoern and Ye, Zhenzhang and Goldluecke, Bastian and Cremers, Daniel}, title = {Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11862-11872} }
LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation: Nisarg A. Shah,

Vibashan VS,

Vishal M. Patel; [pdf] [supp]
[bibtex]
@InProceedings{Shah_2024_CVPR, author = {Shah, Nisarg A. and VS, Vibashan and Patel, Vishal M.}, title = {LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12903-12913} }
Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding: Sai Wang,

Yutian Lin,

Yu Wu; [pdf]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Sai and Lin, Yutian and Wu, Yu}, title = {Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14261-14270} }
VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens: Fan Ma,

Xiaojie Jin,

Heng Wang,

Yuchen Xian,

Jiashi Feng,

Yi Yang; [pdf]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Fan and Jin, Xiaojie and Wang, Heng and Xian, Yuchen and Feng, Jiashi and Yang, Yi}, title = {VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13151-13160} }
Efficient Multitask Dense Predictor via Binarization: Yuzhang Shang,

Dan Xu,

Gaowen Liu,

Ramana Rao Kompella,

Yan Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Xu, Dan and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Efficient Multitask Dense Predictor via Binarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15899-15908} }
Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment: Alireza Ganjdanesh,

Shangqian Gao,

Heng Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ganjdanesh_2024_CVPR, author = {Ganjdanesh, Alireza and Gao, Shangqian and Huang, Heng}, title = {Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16058-16069} }
Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation: Hyunwoo Ryu,

Jiwoo Kim,

Hyunseok An,

Junwoo Chang,

Joohwan Seo,

Taehan Kim,

Yubin Kim,

Chaewon Hwang,

Jongeun Choi,

Roberto Horowitz; [pdf] [supp]
[bibtex]
@InProceedings{Ryu_2024_CVPR, author = {Ryu, Hyunwoo and Kim, Jiwoo and An, Hyunseok and Chang, Junwoo and Seo, Joohwan and Kim, Taehan and Kim, Yubin and Hwang, Chaewon and Choi, Jongeun and Horowitz, Roberto}, title = {Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18007-18018} }
Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding: Le Zhang,

Rabiul Awal,

Aishwarya Agrawal; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Le and Awal, Rabiul and Agrawal, Aishwarya}, title = {Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13774-13784} }
CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images: Changsheng Chen,

Liangwei Lin,

Yongqi Chen,

Bin Li,

Jishen Zeng,

Jiwu Huang; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Changsheng and Lin, Liangwei and Chen, Yongqi and Li, Bin and Zeng, Jishen and Huang, Jiwu}, title = {CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15577-15586} }
VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models: Xiang Li,

Qianli Shen,

Kenji Kawaguchi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Shen, Qianli and Kawaguchi, Kenji}, title = {VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12363-12373} }
Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving: Jinlong Li,

Baolu Li,

Zhengzhong Tu,

Xinyu Liu,

Qing Guo,

Felix Juefei-Xu,

Runsheng Xu,

Hongkai Yu; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jinlong and Li, Baolu and Tu, Zhengzhong and Liu, Xinyu and Guo, Qing and Juefei-Xu, Felix and Xu, Runsheng and Yu, Hongkai}, title = {Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15205-15215} }
Delving into the Trajectory Long-tail Distribution for Muti-object Tracking: Sijia Chen,

En Yu,

Jinyang Li,

Wenbing Tao; [pdf] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Sijia and Yu, En and Li, Jinyang and Tao, Wenbing}, title = {Delving into the Trajectory Long-tail Distribution for Muti-object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19341-19351} }
Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction: Cheng Sun,

Wei-En Tai,

Yu-Lin Shih,

Kuan-Wei Chen,

Yong-Jing Syu,

Kent Selwyn The,

Yu-Chiang Frank Wang,

Hwann-Tzong Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Cheng and Tai, Wei-En and Shih, Yu-Lin and Chen, Kuan-Wei and Syu, Yong-Jing and The, Kent Selwyn and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10435-10445} }
UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather: Haimei Zhao,

Jing Zhang,

Zhuo Chen,

Shanshan Zhao,

Dacheng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Haimei and Zhang, Jing and Chen, Zhuo and Zhao, Shanshan and Tao, Dacheng}, title = {UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14781-14791} }
Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval: Young Kyun Jang,

Donghyun Kim,

Zihang Meng,

Dat Huynh,

Ser-Nam Lim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jang_2024_CVPR, author = {Jang, Young Kyun and Kim, Donghyun and Meng, Zihang and Huynh, Dat and Lim, Ser-Nam}, title = {Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16805-16814} }
Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition: Filip Ilic,

He Zhao,

Thomas Pock,

Richard P. Wildes; [pdf] [arXiv]
[bibtex]
@InProceedings{Ilic_2024_CVPR, author = {Ilic, Filip and Zhao, He and Pock, Thomas and Wildes, Richard P.}, title = {Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18730-18739} }
HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation: Yongliang Lin,

Yongzhi Su,

Praveen Nathan,

Sandeep Inuganti,

Yan Di,

Martin Sundermeyer,

Fabian Manhardt,

Didier Stricker,

Jason Rambach,

Yu Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Yongliang and Su, Yongzhi and Nathan, Praveen and Inuganti, Sandeep and Di, Yan and Sundermeyer, Martin and Manhardt, Fabian and Stricker, Didier and Rambach, Jason and Zhang, Yu}, title = {HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10148-10158} }
DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization: Zeqin Yu,

Jiangqun Ni,

Yuzhen Lin,

Haoyi Deng,

Bin Li; [pdf] [supp]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Zeqin and Ni, Jiangqun and Lin, Yuzhen and Deng, Haoyi and Li, Bin}, title = {DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12765-12774} }
Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation: Keonhee Han,

Dominik Muhle,

Felix Wimbauer,

Daniel Cremers; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Keonhee and Muhle, Dominik and Wimbauer, Felix and Cremers, Daniel}, title = {Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9837-9847} }
Sparse Global Matching for Video Frame Interpolation with Large Motion: Chunxu Liu,

Guozhen Zhang,

Rui Zhao,

Limin Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Chunxu and Zhang, Guozhen and Zhao, Rui and Wang, Limin}, title = {Sparse Global Matching for Video Frame Interpolation with Large Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19125-19134} }
ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction: Zhicheng Zhang,

Junyao Hu,

Wentao Cheng,

Danda Paudel,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Hu, Junyao and Cheng, Wentao and Paudel, Danda and Yang, Jufeng}, title = {ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19310-19320} }
Point Segment and Count: A Generalized Framework for Object Counting: Zhizhong Huang,

Mingliang Dai,

Yi Zhang,

Junping Zhang,

Hongming Shan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zhizhong and Dai, Mingliang and Zhang, Yi and Zhang, Junping and Shan, Hongming}, title = {Point Segment and Count: A Generalized Framework for Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17067-17076} }
PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection: Kuan-Chih Huang,

Weijie Lyu,

Ming-Hsuan Yang,

Yi-Hsuan Tsai; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Kuan-Chih and Lyu, Weijie and Yang, Ming-Hsuan and Tsai, Yi-Hsuan}, title = {PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14938-14947} }
Generative Proxemics: A Prior for 3D Social Interaction from Images: Lea Müller,

Vickie Ye,

Georgios Pavlakos,

Michael Black,

Angjoo Kanazawa; [pdf] [supp]
[bibtex]
@InProceedings{Muller_2024_CVPR, author = {M\"uller, Lea and Ye, Vickie and Pavlakos, Georgios and Black, Michael and Kanazawa, Angjoo}, title = {Generative Proxemics: A Prior for 3D Social Interaction from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9687-9697} }
A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization: Hongwei Ren,

Jiadong Zhu,

Yue Zhou,

Haotian Fu,

Yulong Huang,

Bojun Cheng; [pdf]
[bibtex]
@InProceedings{Ren_2024_CVPR, author = {Ren, Hongwei and Zhu, Jiadong and Zhou, Yue and Fu, Haotian and Huang, Yulong and Cheng, Bojun}, title = {A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18112-18121} }
Region-Based Representations Revisited: Michal Shlapentokh-Rothman,

Ansel Blume,

Yao Xiao,

Yuqun Wu,

Sethuraman TV,

Heyi Tao,

Jae Yong Lee,

Wilfredo Torres,

Yu-Xiong Wang,

Derek Hoiem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shlapentokh-Rothman_2024_CVPR, author = {Shlapentokh-Rothman, Michal and Blume, Ansel and Xiao, Yao and Wu, Yuqun and TV, Sethuraman and Tao, Heyi and Lee, Jae Yong and Torres, Wilfredo and Wang, Yu-Xiong and Hoiem, Derek}, title = {Region-Based Representations Revisited}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17107-17116} }
GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation: Zifan Wang,

Junyu Chen,

Ziqing Chen,

Pengwei Xie,

Rui Chen,

Li Yi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zifan and Chen, Junyu and Chen, Ziqing and Xie, Pengwei and Chen, Rui and Yi, Li}, title = {GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16362-16372} }
Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration: Tony C. W. Mok,

Zi Li,

Yunhao Bai,

Jianpeng Zhang,

Wei Liu,

Yan-Jie Zhou,

Ke Yan,

Dakai Jin,

Yu Shi,

Xiaoli Yin,

Le Lu,

Ling Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mok_2024_CVPR, author = {Mok, Tony C. W. and Li, Zi and Bai, Yunhao and Zhang, Jianpeng and Liu, Wei and Zhou, Yan-Jie and Yan, Ke and Jin, Dakai and Shi, Yu and Yin, Xiaoli and Lu, Le and Zhang, Ling}, title = {Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11215-11225} }
Any-Shift Prompting for Generalization over Distributions: Zehao Xiao,

Jiayi Shen,

Mohammad Mahdi Derakhshani,

Shengcai Liao,

Cees G. M. Snoek; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zehao and Shen, Jiayi and Derakhshani, Mohammad Mahdi and Liao, Shengcai and Snoek, Cees G. M.}, title = {Any-Shift Prompting for Generalization over Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13849-13860} }
CPR-Coach: Recognizing Composite Error Actions based on Single-class Training: Shunli Wang,

Shuaibing Wang,

Dingkang Yang,

Mingcheng Li,

Haopeng Kuang,

Xiao Zhao,

Liuzhen Su,

Peng Zhai,

Lihua Zhang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shunli and Wang, Shuaibing and Yang, Dingkang and Li, Mingcheng and Kuang, Haopeng and Zhao, Xiao and Su, Liuzhen and Zhai, Peng and Zhang, Lihua}, title = {CPR-Coach: Recognizing Composite Error Actions based on Single-class Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18782-18792} }
RTracker: Recoverable Tracking via PN Tree Structured Memory: Yuqing Huang,

Xin Li,

Zikun Zhou,

Yaowei Wang,

Zhenyu He,

Ming-Hsuan Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Yuqing and Li, Xin and Zhou, Zikun and Wang, Yaowei and He, Zhenyu and Yang, Ming-Hsuan}, title = {RTracker: Recoverable Tracking via PN Tree Structured Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19038-19047} }
DualAD: Disentangling the Dynamic and Static World for End-to-End Driving: Simon Doll,

Niklas Hanselmann,

Lukas Schneider,

Richard Schulz,

Marius Cordts,

Markus Enzweiler,

Hendrik P. A. Lensch; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Doll_2024_CVPR, author = {Doll, Simon and Hanselmann, Niklas and Schneider, Lukas and Schulz, Richard and Cordts, Marius and Enzweiler, Markus and Lensch, Hendrik P. A.}, title = {DualAD: Disentangling the Dynamic and Static World for End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14728-14737} }
MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection: Jakub Micorek,

Horst Possegger,

Dominik Narnhofer,

Horst Bischof,

Mateusz Kozinski; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Micorek_2024_CVPR, author = {Micorek, Jakub and Possegger, Horst and Narnhofer, Dominik and Bischof, Horst and Kozinski, Mateusz}, title = {MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18868-18877} }
PTQ4SAM: Post-Training Quantization for Segment Anything: Chengtao Lv,

Hong Chen,

Jinyang Guo,

Yifu Ding,

Xianglong Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lv_2024_CVPR, author = {Lv, Chengtao and Chen, Hong and Guo, Jinyang and Ding, Yifu and Liu, Xianglong}, title = {PTQ4SAM: Post-Training Quantization for Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15941-15951} }
Improving Bird's Eye View Semantic Segmentation by Task Decomposition: Tianhao Zhao,

Yongcan Chen,

Yu Wu,

Tianyang Liu,

Bo Du,

Peilun Xiao,

Shi Qiu,

Hongda Yang,

Guozhen Li,

Yi Yang,

Yutian Lin; [pdf]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Tianhao and Chen, Yongcan and Wu, Yu and Liu, Tianyang and Du, Bo and Xiao, Peilun and Qiu, Shi and Yang, Hongda and Li, Guozhen and Yang, Yi and Lin, Yutian}, title = {Improving Bird's Eye View Semantic Segmentation by Task Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15512-15521} }
Scene Adaptive Sparse Transformer for Event-based Object Detection: Yansong Peng,

Hebei Li,

Yueyi Zhang,

Xiaoyan Sun,

Feng Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Yansong and Li, Hebei and Zhang, Yueyi and Sun, Xiaoyan and Wu, Feng}, title = {Scene Adaptive Sparse Transformer for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16794-16804} }
CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition: Qixuan Zheng,

Ming Zhang,

Hong Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zheng_2024_CVPR, author = {Zheng, Qixuan and Zhang, Ming and Yan, Hong}, title = {CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16036-16045} }
GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes: Haozhe Lin,

Chunyu Wei,

Li He,

Yuchen Guo,

Yunqi Zhao,

Shanglong Li,

Lu Fang; [pdf]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Haozhe and Wei, Chunyu and He, Li and Guo, Yuchen and Zhao, Yunqi and Li, Shanglong and Fang, Lu}, title = {GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19331-19340} }
C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation: Fushuo Huo,

Wenchao Xu,

Jingcai Guo,

Haozhao Wang,

Song Guo; [pdf] [supp]
[bibtex]
@InProceedings{Huo_2024_CVPR, author = {Huo, Fushuo and Xu, Wenchao and Guo, Jingcai and Wang, Haozhao and Guo, Song}, title = {C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16006-16015} }
Traceable Federated Continual Learning: Qiang Wang,

Bingyan Liu,

Yawen Li; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Qiang and Liu, Bingyan and Li, Yawen}, title = {Traceable Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12872-12881} }
V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs: Penghao Wu,

Saining Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Penghao and Xie, Saining}, title = {V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13084-13094} }
Uncertainty Visualization via Low-Dimensional Posterior Projections: Omer Yair,

Elias Nehme,

Tomer Michaeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yair_2024_CVPR, author = {Yair, Omer and Nehme, Elias and Michaeli, Tomer}, title = {Uncertainty Visualization via Low-Dimensional Posterior Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11041-11051} }
VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning: Ziyang Luo,

Nian Liu,

Wangbo Zhao,

Xuguang Yang,

Dingwen Zhang,

Deng-Ping Fan,

Fahad Khan,

Junwei Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Ziyang and Liu, Nian and Zhao, Wangbo and Yang, Xuguang and Zhang, Dingwen and Fan, Deng-Ping and Khan, Fahad and Han, Junwei}, title = {VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17169-17180} }
PointInfinity: Resolution-Invariant Point Diffusion Models: Zixuan Huang,

Justin Johnson,

Shoubhik Debnath,

James M. Rehg,

Chao-Yuan Wu; [pdf] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Johnson, Justin and Debnath, Shoubhik and Rehg, James M. and Wu, Chao-Yuan}, title = {PointInfinity: Resolution-Invariant Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10050-10060} }
Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization: Zhi-Fan Wu,

Chaojie Mao,

Wue Wang,

Jianwen Jiang,

Yiliang Lv,

Rong Jin; [pdf] [supp]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Zhi-Fan and Mao, Chaojie and Wang, Wue and Jiang, Jianwen and Lv, Yiliang and Jin, Rong}, title = {Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16849-16858} }
Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering: Jiawei Yao,

Qi Qian,

Juhua Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yao_2024_CVPR, author = {Yao, Jiawei and Qian, Qi and Hu, Juhua}, title = {Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14066-14075} }
ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images: Nicolas Bourriez,

Ihab Bendidi,

Ethan Cohen,

Gabriel Watkinson,

Maxime Sanchez,

Guillaume Bollot,

Auguste Genovesio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bourriez_2024_CVPR, author = {Bourriez, Nicolas and Bendidi, Ihab and Cohen, Ethan and Watkinson, Gabriel and Sanchez, Maxime and Bollot, Guillaume and Genovesio, Auguste}, title = {ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11556-11565} }
CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification: Haoran Lai,

Qingsong Yao,

Zihang Jiang,

Rongsheng Wang,

Zhiyang He,

Xiaodong Tao,

S. Kevin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lai_2024_CVPR, author = {Lai, Haoran and Yao, Qingsong and Jiang, Zihang and Wang, Rongsheng and He, Zhiyang and Tao, Xiaodong and Zhou, S. Kevin}, title = {CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11137-11146} }
Multi-Modal Hallucination Control by Visual Information Grounding: Alessandro Favero,

Luca Zancato,

Matthew Trager,

Siddharth Choudhary,

Pramuditha Perera,

Alessandro Achille,

Ashwin Swaminathan,

Stefano Soatto; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Favero_2024_CVPR, author = {Favero, Alessandro and Zancato, Luca and Trager, Matthew and Choudhary, Siddharth and Perera, Pramuditha and Achille, Alessandro and Swaminathan, Ashwin and Soatto, Stefano}, title = {Multi-Modal Hallucination Control by Visual Information Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14303-14312} }
The Neglected Tails in Vision-Language Models: Shubham Parashar,

Zhiqiu Lin,

Tian Liu,

Xiangjue Dong,

Yanan Li,

Deva Ramanan,

James Caverlee,

Shu Kong; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Parashar_2024_CVPR, author = {Parashar, Shubham and Lin, Zhiqiu and Liu, Tian and Dong, Xiangjue and Li, Yanan and Ramanan, Deva and Caverlee, James and Kong, Shu}, title = {The Neglected Tails in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12988-12997} }
Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection: Jiaming Li,

Jiacheng Zhang,

Jichang Li,

Ge Li,

Si Liu,

Liang Lin,

Guanbin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Li, Jichang and Li, Ge and Liu, Si and Lin, Liang and Li, Guanbin}, title = {Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16678-16687} }
Towards Accurate Post-training Quantization for Diffusion Models: Changyuan Wang,

Ziwei Wang,

Xiuwei Xu,

Yansong Tang,

Jie Zhou,

Jiwen Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Changyuan and Wang, Ziwei and Xu, Xiuwei and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {Towards Accurate Post-training Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16026-16035} }
GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation: Mukul Khanna,

Ram Ramrakhya,

Gunjan Chhablani,

Sriram Yenamandra,

Theophile Gervet,

Matthew Chang,

Zsolt Kira,

Devendra Singh Chaplot,

Dhruv Batra,

Roozbeh Mottaghi; [pdf] [supp]
[bibtex]
@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Ramrakhya, Ram and Chhablani, Gunjan and Yenamandra, Sriram and Gervet, Theophile and Chang, Matthew and Kira, Zsolt and Chaplot, Devendra Singh and Batra, Dhruv and Mottaghi, Roozbeh}, title = {GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16373-16383} }
Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation: Shuting He,

Henghui Ding; [pdf] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Shuting and Ding, Henghui}, title = {Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13332-13341} }
Dense Vision Transformer Compression with Few Samples: Hanxiao Zhang,

Yifan Zhou,

Guo-Hua Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hanxiao and Zhou, Yifan and Wang, Guo-Hua}, title = {Dense Vision Transformer Compression with Few Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15825-15834} }
Masked AutoDecoder is Effective Multi-Task Vision Generalist: Han Qiu,

Jiaxing Huang,

Peng Gao,

Lewei Lu,

Xiaoqin Zhang,

Shijian Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qiu_2024_CVPR, author = {Qiu, Han and Huang, Jiaxing and Gao, Peng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Masked AutoDecoder is Effective Multi-Task Vision Generalist}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14152-14161} }
Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration: Mingyuan Meng,

Dagan Feng,

Lei Bi,

Jinman Kim; [pdf] [arXiv]
[bibtex]
@InProceedings{Meng_2024_CVPR, author = {Meng, Mingyuan and Feng, Dagan and Bi, Lei and Kim, Jinman}, title = {Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9645-9654} }
Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts: Jiawen Zhu,

Guansong Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Pang, Guansong}, title = {Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17826-17836} }
Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification: Puru Vaish,

Shunxin Wang,

Nicola Strisciuglio; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Vaish_2024_CVPR, author = {Vaish, Puru and Wang, Shunxin and Strisciuglio, Nicola}, title = {Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17763-17772} }
PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar: Tzofi Klinghoffer,

Xiaoyu Xiang,

Siddharth Somasundaram,

Yuchen Fan,

Christian Richardt,

Ramesh Raskar,

Rakesh Ranjan; [pdf] [supp]
[bibtex]
@InProceedings{Klinghoffer_2024_CVPR, author = {Klinghoffer, Tzofi and Xiang, Xiaoyu and Somasundaram, Siddharth and Fan, Yuchen and Richardt, Christian and Raskar, Ramesh and Ranjan, Rakesh}, title = {PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14565-14574} }
An Interactive Navigation Method with Effect-oriented Affordance: Xiaohan Wang,

Yuehu Liu,

Xinhang Song,

Yuyi Liu,

Sixian Zhang,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaohan and Liu, Yuehu and Song, Xinhang and Liu, Yuyi and Zhang, Sixian and Jiang, Shuqiang}, title = {An Interactive Navigation Method with Effect-oriented Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16446-16456} }
PREGO: Online Mistake Detection in PRocedural EGOcentric Videos: Alessandro Flaborea,

Guido Maria D'Amely di Melendugno,

Leonardo Plini,

Luca Scofano,

Edoardo De Matteis,

Antonino Furnari,

Giovanni Maria Farinella,

Fabio Galasso; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Flaborea_2024_CVPR, author = {Flaborea, Alessandro and di Melendugno, Guido Maria D'Amely and Plini, Leonardo and Scofano, Luca and De Matteis, Edoardo and Furnari, Antonino and Farinella, Giovanni Maria and Galasso, Fabio}, title = {PREGO: Online Mistake Detection in PRocedural EGOcentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18483-18492} }
Logit Standardization in Knowledge Distillation: Shangquan Sun,

Wenqi Ren,

Jingzhi Li,

Rui Wang,

Xiaochun Cao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Li, Jingzhi and Wang, Rui and Cao, Xiaochun}, title = {Logit Standardization in Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15731-15740} }
Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval: Fan Zhang,

Xian-Sheng Hua,

Chong Chen,

Xiao Luo; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and Hua, Xian-Sheng and Chen, Chong and Luo, Xiao}, title = {Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17016-17026} }
Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning: Joshua C. Zhao,

Ahaan Dabholkar,

Atul Sharma,

Saurabh Bagchi; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Joshua C. and Dabholkar, Ahaan and Sharma, Atul and Bagchi, Saurabh}, title = {Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12247-12256} }
OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation: Jisoo Jeong,

Hong Cai,

Risheek Garrepalli,

Jamie Menjay Lin,

Munawar Hayat,

Fatih Porikli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jisoo and Cai, Hong and Garrepalli, Risheek and Lin, Jamie Menjay and Hayat, Munawar and Porikli, Fatih}, title = {OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19352-19362} }
Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes: Hmrishav Bandyopadhyay,

Subhadeep Koley,

Ayan Das,

Ayan Kumar Bhunia,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Koley, Subhadeep and Das, Ayan and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9795-9805} }
Single View Refractive Index Tomography with Neural Fields: Brandon Zhao,

Aviad Levis,

Liam Connor,

Pratul P. Srinivasan,

Katherine L. Bouman; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Brandon and Levis, Aviad and Connor, Liam and Srinivasan, Pratul P. and Bouman, Katherine L.}, title = {Single View Refractive Index Tomography with Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25358-25367} }
XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images: Chong Yin,

Siqi Liu,

Fei Lyu,

Jiahao Lu,

Sune Darkner,

Vincent Wai-Sun Wong,

Pong C. Yuen; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Lyu, Fei and Lu, Jiahao and Darkner, Sune and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11282-11291} }
UnO: Unsupervised Occupancy Fields for Perception and Forecasting: Ben Agro,

Quinlan Sykora,

Sergio Casas,

Thomas Gilles,

Raquel Urtasun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Agro_2024_CVPR, author = {Agro, Ben and Sykora, Quinlan and Casas, Sergio and Gilles, Thomas and Urtasun, Raquel}, title = {UnO: Unsupervised Occupancy Fields for Perception and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14487-14496} }
SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities: Boyuan Chen,

Zhuo Xu,

Sean Kirmani,

Brain Ichter,

Dorsa Sadigh,

Leonidas Guibas,

Fei Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Boyuan and Xu, Zhuo and Kirmani, Sean and Ichter, Brain and Sadigh, Dorsa and Guibas, Leonidas and Xia, Fei}, title = {SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14455-14465} }
InstructDiffusion: A Generalist Modeling Interface for Vision Tasks: Zigang Geng,

Binxin Yang,

Tiankai Hang,

Chen Li,

Shuyang Gu,

Ting Zhang,

Jianmin Bao,

Zheng Zhang,

Houqiang Li,

Han Hu,

Dong Chen,

Baining Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Geng_2024_CVPR, author = {Geng, Zigang and Yang, Binxin and Hang, Tiankai and Li, Chen and Gu, Shuyang and Zhang, Ting and Bao, Jianmin and Zhang, Zheng and Li, Houqiang and Hu, Han and Chen, Dong and Guo, Baining}, title = {InstructDiffusion: A Generalist Modeling Interface for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12709-12720} }
Gated Fields: Learning Scene Reconstruction from Gated Videos: Andrea Ramazzina,

Stefanie Walz,

Pragyan Dahal,

Mario Bijelic,

Felix Heide; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ramazzina_2024_CVPR, author = {Ramazzina, Andrea and Walz, Stefanie and Dahal, Pragyan and Bijelic, Mario and Heide, Felix}, title = {Gated Fields: Learning Scene Reconstruction from Gated Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10530-10541} }
RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features: Geonho Bang,

Kwangjin Choi,

Jisong Kim,

Dongsuk Kum,

Jun Won Choi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bang_2024_CVPR, author = {Bang, Geonho and Choi, Kwangjin and Kim, Jisong and Kum, Dongsuk and Choi, Jun Won}, title = {RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15491-15500} }
Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation: Jihyun Kim,

Changjae Oh,

Hoseok Do,

Soohyun Kim,

Kwanghoon Sohn; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jihyun and Oh, Changjae and Do, Hoseok and Kim, Soohyun and Sohn, Kwanghoon}, title = {Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10403-10412} }
Low-Rank Knowledge Decomposition for Medical Foundation Models: Yuhang Zhou,

Haolin Li,

Siyuan Du,

Jiangchao Yao,

Ya Zhang,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuhang and Li, Haolin and Du, Siyuan and Yao, Jiangchao and Zhang, Ya and Wang, Yanfeng}, title = {Low-Rank Knowledge Decomposition for Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11611-11620} }
Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining: Qi Cui,

Ruohan Meng,

Chaohui Xu,

Chip-Hong Chang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cui_2024_CVPR, author = {Cui, Qi and Meng, Ruohan and Xu, Chaohui and Chang, Chip-Hong}, title = {Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12302-12311} }
En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data: Yifang Men,

Biwen Lei,

Yuan Yao,

Miaomiao Cui,

Zhouhui Lian,

Xuansong Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Lei, Biwen and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong}, title = {En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9981-9991} }
Neural Visibility Field for Uncertainty-Driven Active Mapping: Shangjie Xue,

Jesse Dill,

Pranay Mathur,

Frank Dellaert,

Panagiotis Tsiotra,

Danfei Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xue_2024_CVPR, author = {Xue, Shangjie and Dill, Jesse and Mathur, Pranay and Dellaert, Frank and Tsiotra, Panagiotis and Xu, Danfei}, title = {Neural Visibility Field for Uncertainty-Driven Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18122-18132} }
Tri-Perspective View Decomposition for Geometry-Aware Depth Completion: Zhiqiang Yan,

Yuankai Lin,

Kun Wang,

Yupeng Zheng,

Yufei Wang,

Zhenyu Zhang,

Jun Li,

Jian Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiqiang and Lin, Yuankai and Wang, Kun and Zheng, Yupeng and Wang, Yufei and Zhang, Zhenyu and Li, Jun and Yang, Jian}, title = {Tri-Perspective View Decomposition for Geometry-Aware Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4874-4884} }
Relaxed Contrastive Learning for Federated Learning: Seonguk Seo,

Jinkyu Kim,

Geeho Kim,

Bohyung Han; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Seo_2024_CVPR, author = {Seo, Seonguk and Kim, Jinkyu and Kim, Geeho and Han, Bohyung}, title = {Relaxed Contrastive Learning for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12279-12288} }
Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID: Wentan Tan,

Changxing Ding,

Jiayu Jiang,

Fei Wang,

Yibing Zhan,

Dapeng Tao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tan_2024_CVPR, author = {Tan, Wentan and Ding, Changxing and Jiang, Jiayu and Wang, Fei and Zhan, Yibing and Tao, Dapeng}, title = {Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17127-17137} }
Weakly Supervised Video Individual Counting: Xinyan Liu,

Guorong Li,

Yuankai Qi,

Ziheng Yan,

Zhenjun Han,

Anton van den Hengel,

Ming-Hsuan Yang,

Qingming Huang; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xinyan and Li, Guorong and Qi, Yuankai and Yan, Ziheng and Han, Zhenjun and van den Hengel, Anton and Yang, Ming-Hsuan and Huang, Qingming}, title = {Weakly Supervised Video Individual Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19228-19237} }
Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models: Zijin Yang,

Kai Zeng,

Kejiang Chen,

Han Fang,

Weiming Zhang,

Nenghai Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Zijin and Zeng, Kai and Chen, Kejiang and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12162-12171} }
DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks: Jiaxin Zhang,

Dezhi Peng,

Chongyu Liu,

Peirong Zhang,

Lianwen Jin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiaxin and Peng, Dezhi and Liu, Chongyu and Zhang, Peirong and Jin, Lianwen}, title = {DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15654-15664} }
Honeybee: Locality-enhanced Projector for Multimodal LLM: Junbum Cha,

Wooyoung Kang,

Jonghwan Mun,

Byungseok Roh; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cha_2024_CVPR, author = {Cha, Junbum and Kang, Wooyoung and Mun, Jonghwan and Roh, Byungseok}, title = {Honeybee: Locality-enhanced Projector for Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13817-13827} }
Learned Trajectory Embedding for Subspace Clustering: Yaroslava Lochman,

Carl Olsson,

Christopher Zach; [pdf] [supp]
[bibtex]
@InProceedings{Lochman_2024_CVPR, author = {Lochman, Yaroslava and Olsson, Carl and Zach, Christopher}, title = {Learned Trajectory Embedding for Subspace Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19092-19102} }
HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D: Sangmin Woo,

Byeongjun Park,

Hyojun Go,

Jin-Young Kim,

Changick Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Woo_2024_CVPR, author = {Woo, Sangmin and Park, Byeongjun and Go, Hyojun and Kim, Jin-Young and Kim, Changick}, title = {HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10574-10584} }
UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model: Shuai Yuan,

Lei Luo,

Zhuo Hui,

Can Pu,

Xiaoyu Xiang,

Rakesh Ranjan,

Denis Demandolx; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yuan_2024_CVPR, author = {Yuan, Shuai and Luo, Lei and Hui, Zhuo and Pu, Can and Xiang, Xiaoyu and Ranjan, Rakesh and Demandolx, Denis}, title = {UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19027-19037} }
Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation: Wenxiao Deng,

Wenbin Li,

Tianyu Ding,

Lei Wang,

Hongguang Zhang,

Kuihua Huang,

Jing Huo,

Yang Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Deng_2024_CVPR, author = {Deng, Wenxiao and Li, Wenbin and Ding, Tianyu and Wang, Lei and Zhang, Hongguang and Huang, Kuihua and Huo, Jing and Gao, Yang}, title = {Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17057-17066} }
Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning: Yun Li,

Zhe Liu,

Hang Chen,

Lina Yao; [pdf] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yun and Liu, Zhe and Chen, Hang and Yao, Lina}, title = {Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17037-17046} }
Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution: Guangyuan Li,

Chen Rao,

Juncheng Mo,

Zhanjie Zhang,

Wei Xing,

Lei Zhao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Guangyuan and Rao, Chen and Mo, Juncheng and Zhang, Zhanjie and Xing, Wei and Zhao, Lei}, title = {Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11365-11374} }
Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization: Mainak Singha,

Ankit Jha,

Shirsha Bose,

Ashwin Nair,

Moloud Abdar,

Biplab Banerjee; [pdf] [supp]
[bibtex]
@InProceedings{Singha_2024_CVPR, author = {Singha, Mainak and Jha, Ankit and Bose, Shirsha and Nair, Ashwin and Abdar, Moloud and Banerjee, Biplab}, title = {Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13309-13319} }
From Coarse to Fine-Grained Open-Set Recognition: Nico Lang,

Vésteinn Snæbjarnarson,

Elijah Cole,

Oisin Mac Aodha,

Christian Igel,

Serge Belongie; [pdf] [supp]
[bibtex]
@InProceedings{Lang_2024_CVPR, author = {Lang, Nico and Sn{\ae}bjarnarson, V\'esteinn and Cole, Elijah and Mac Aodha, Oisin and Igel, Christian and Belongie, Serge}, title = {From Coarse to Fine-Grained Open-Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17804-17814} }
OmniViD: A Generative Framework for Universal Video Understanding: Junke Wang,

Dongdong Chen,

Chong Luo,

Bo He,

Lu Yuan,

Zuxuan Wu,

Yu-Gang Jiang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Junke and Chen, Dongdong and Luo, Chong and He, Bo and Yuan, Lu and Wu, Zuxuan and Jiang, Yu-Gang}, title = {OmniViD: A Generative Framework for Universal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18209-18220} }
Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners: Chun Feng,

Joy Hsu,

Weiyu Liu,

Jiajun Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Feng_2024_CVPR, author = {Feng, Chun and Hsu, Joy and Liu, Weiyu and Wu, Jiajun}, title = {Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13269-13278} }
CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification: Yiyu Chen,

Zheyi Fan,

Zhaoru Chen,

Yixuan Zhu; [pdf] [supp] [arXiv ]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyu and Fan, Zheyi and Chen, Zhaoru and Zhu, Yixuan}, title = {CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17532-17541} }
AutoAD III: The Prequel - Back to the Pixels: Tengda Han,

Max Bain,

Arsha Nagrani,

Gül Varol,

Weidi Xie,

Andrew Zisserman; [pdf]
[bibtex]
@InProceedings{Han_2024_CVPR, author = {Han, Tengda and Bain, Max and Nagrani, Arsha and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {AutoAD III: The Prequel - Back to the Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18164-18174} }
Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval: Zhen-Duo Chen,

Li-Jun Zhao,

Zi-Chao Zhang,

Xin Luo,

Xin-Shun Xu; [pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Zhen-Duo and Zhao, Li-Jun and Zhang, Zi-Chao and Luo, Xin and Xu, Xin-Shun}, title = {Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17273-17281} }
Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences: Axel Barroso-Laguna,

Sowmya Munukutla,

Victor Adrian Prisacariu,

Eric Brachmann; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Barroso-Laguna_2024_CVPR, author = {Barroso-Laguna, Axel and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4852-4863} }
M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection: Bin Pu,

Liwen Wang,

Jiewen Yang,

Guannan He,

Xingbo Dong,

Shengli Li,

Ying Tan,

Ming Chen,

Zhe Jin,

Kenli Li,

Xiaomeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Pu_2024_CVPR, author = {Pu, Bin and Wang, Liwen and Yang, Jiewen and He, Guannan and Dong, Xingbo and Li, Shengli and Tan, Ying and Chen, Ming and Jin, Zhe and Li, Kenli and Li, Xiaomeng}, title = {M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11621-11630} }
Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding: Peng Jin,

Ryuichi Takanobu,

Wancai Zhang,

Xiaochun Cao,

Li Yuan; [pdf] [supp]
[bibtex]
@InProceedings{Jin_2024_CVPR, author = {Jin, Peng and Takanobu, Ryuichi and Zhang, Wancai and Cao, Xiaochun and Yuan, Li}, title = {Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13700-13710} }
Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer: Junyi Wu,

Bin Duan,

Weitai Kang,

Hao Tang,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Duan, Bin and Kang, Weitai and Tang, Hao and Yan, Yan}, title = {Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10926-10935} }
Bayesian Differentiable Physics for Cloth Digitalization: Deshan Gong,

Ningtao Mao,

He Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gong_2024_CVPR, author = {Gong, Deshan and Mao, Ningtao and Wang, He}, title = {Bayesian Differentiable Physics for Cloth Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11841-11851} }
Higher-order Relational Reasoning for Pedestrian Trajectory Prediction: Sungjune Kim,

Hyung-gun Chi,

Hyerin Lim,

Karthik Ramani,

Jinkyu Kim,

Sangpil Kim; [pdf] [supp]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Sungjune and Chi, Hyung-gun and Lim, Hyerin and Ramani, Karthik and Kim, Jinkyu and Kim, Sangpil}, title = {Higher-order Relational Reasoning for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15251-15260} }
RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection: Ximiao Zhang,

Min Xu,

Xiuzhuang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ximiao and Xu, Min and Zhou, Xiuzhuang}, title = {RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16699-16708} }
Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception: Junwen He,

Yifan Wang,

Lijun Wang,

Huchuan Lu,

Jun-Yan He,

Jin-Peng Lan,

Bin Luo,

Xuansong Xie; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{He_2024_CVPR, author = {He, Junwen and Wang, Yifan and Wang, Lijun and Lu, Huchuan and He, Jun-Yan and Lan, Jin-Peng and Luo, Bin and Xie, Xuansong}, title = {Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13980-13990} }
LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs: Yunsheng Ma,

Can Cui,

Xu Cao,

Wenqian Ye,

Peiran Liu,

Juanwu Lu,

Amr Abdelraouf,

Rohit Gupta,

Kyungtae Han,

Aniket Bera,

James M. Rehg,

Ziran Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Yunsheng and Cui, Can and Cao, Xu and Ye, Wenqian and Liu, Peiran and Lu, Juanwu and Abdelraouf, Amr and Gupta, Rohit and Han, Kyungtae and Bera, Aniket and Rehg, James M. and Wang, Ziran}, title = {LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15141-15151} }
FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication: Eric Slyman,

Stefan Lee,

Scott Cohen,

Kushal Kafle; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Slyman_2024_CVPR, author = {Slyman, Eric and Lee, Stefan and Cohen, Scott and Kafle, Kushal}, title = {FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13905-13916} }
Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention: Ju-Hyeon Nam,

Nur Suriza Syazwany,

Su Jung Kim,

Sang-Chul Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2024_CVPR, author = {Nam, Ju-Hyeon and Syazwany, Nur Suriza and Kim, Su Jung and Lee, Sang-Chul}, title = {Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11480-11491} }
Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft: Hao Li,

Xue Yang,

Zhaokai Wang,

Xizhou Zhu,

Jie Zhou,

Yu Qiao,

Xiaogang Wang,

Hongsheng Li,

Lewei Lu,

Jifeng Dai; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hao and Yang, Xue and Wang, Zhaokai and Zhu, Xizhou and Zhou, Jie and Qiao, Yu and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16426-16435} }
GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects: Sungphill Moon,

Hyeontae Son,

Dongcheol Hur,

Sangwook Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10039-10049} }
Logarithmic Lenses: Exploring Log RGB Data for Image Classification: Bruce A. Maxwell,

Sumegha Singhania,

Avnish Patel,

Rahul Kumar,

Heather Fryling,

Sihan Li,

Haonan Sun,

Ping He,

Zewen Li; [pdf]
[bibtex]
@InProceedings{Maxwell_2024_CVPR, author = {Maxwell, Bruce A. and Singhania, Sumegha and Patel, Avnish and Kumar, Rahul and Fryling, Heather and Li, Sihan and Sun, Haonan and He, Ping and Li, Zewen}, title = {Logarithmic Lenses: Exploring Log RGB Data for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17470-17479} }
Scaled Decoupled Distillation: Shicai Wei,

Chunbo Luo,

Yang Luo; [pdf] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Scaled Decoupled Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15975-15983} }
Cloud-Device Collaborative Learning for Multimodal Large Language Models: Guanqun Wang,

Jiaming Liu,

Chenxuan Li,

Yuan Zhang,

Junpeng Ma,

Xinyu Wei,

Kevin Zhang,

Maurice Chong,

Renrui Zhang,

Yijiang Liu,

Shanghang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Guanqun and Liu, Jiaming and Li, Chenxuan and Zhang, Yuan and Ma, Junpeng and Wei, Xinyu and Zhang, Kevin and Chong, Maurice and Zhang, Renrui and Liu, Yijiang and Zhang, Shanghang}, title = {Cloud-Device Collaborative Learning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12646-12655} }
KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling: Yu Wang,

Xin Li,

Shengzhao Weng,

Gang Zhang,

Haixiao Yue,

Haocheng Feng,

Junyu Han,

Errui Ding; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yu and Li, Xin and Weng, Shengzhao and Zhang, Gang and Yue, Haixiao and Feng, Haocheng and Han, Junyu and Ding, Errui}, title = {KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16016-16025} }
EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation: Md Mostafijur Rahman,

Mustafa Munir,

Radu Marculescu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rahman_2024_CVPR, author = {Rahman, Md Mostafijur and Munir, Mustafa and Marculescu, Radu}, title = {EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11769-11779} }
MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation: Zhicheng Zhang,

Pancheng Zhao,

Eunil Park,

Jufeng Yang; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Zhao, Pancheng and Park, Eunil and Yang, Jufeng}, title = {MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12830-12840} }
MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning: Ahmed Agiza,

Marina Neseem,

Sherief Reda; [pdf] [supp]
[bibtex]
@InProceedings{Agiza_2024_CVPR, author = {Agiza, Ahmed and Neseem, Marina and Reda, Sherief}, title = {MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16196-16205} }
Motion Blur Decomposition with Cross-shutter Guidance: Xiang Ji,

Haiyang Jiang,

Yinqiang Zheng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ji_2024_CVPR, author = {Ji, Xiang and Jiang, Haiyang and Zheng, Yinqiang}, title = {Motion Blur Decomposition with Cross-shutter Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12534-12543} }
Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection: Xiaowei Zhao,

Xianglong Liu,

Duorui Wang,

Yajun Gao,

Zhide Liu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xiaowei and Liu, Xianglong and Wang, Duorui and Gao, Yajun and Liu, Zhide}, title = {Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16741-16750} }
Instance-Aware Group Quantization for Vision Transformers: Jaehyeon Moon,

Dohyung Kim,

Junyong Cheon,

Bumsub Ham; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Moon_2024_CVPR, author = {Moon, Jaehyeon and Kim, Dohyung and Cheon, Junyong and Ham, Bumsub}, title = {Instance-Aware Group Quantization for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16132-16141} }
A General and Efficient Training for Transformer via Token Expansion: Wenxuan Huang,

Yunhang Shen,

Jiao Xie,

Baochang Zhang,

Gaoqi He,

Ke Li,

Xing Sun,

Shaohui Lin; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Wenxuan and Shen, Yunhang and Xie, Jiao and Zhang, Baochang and He, Gaoqi and Li, Ke and Sun, Xing and Lin, Shaohui}, title = {A General and Efficient Training for Transformer via Token Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15783-15792} }
Tyche: Stochastic In-Context Learning for Medical Image Segmentation: Marianne Rakic,

Hallee E. Wong,

Jose Javier Gonzalez Ortiz,

Beth A. Cimini,

John V. Guttag,

Adrian V. Dalca; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Rakic_2024_CVPR, author = {Rakic, Marianne and Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Cimini, Beth A. and Guttag, John V. and Dalca, Adrian V.}, title = {Tyche: Stochastic In-Context Learning for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11159-11173} }
YOLO-World: Real-Time Open-Vocabulary Object Detection: Tianheng Cheng,

Lin Song,

Yixiao Ge,

Wenyu Liu,

Xinggang Wang,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Tianheng and Song, Lin and Ge, Yixiao and Liu, Wenyu and Wang, Xinggang and Shan, Ying}, title = {YOLO-World: Real-Time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16901-16911} }
Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation: Xiaoyu Liu,

Miaomiao Cai,

Yinda Chen,

Yueyi Zhang,

Te Shi,

Ruobing Zhang,

Xuejin Chen,

Zhiwei Xiong; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaoyu and Cai, Miaomiao and Chen, Yinda and Zhang, Yueyi and Shi, Te and Zhang, Ruobing and Chen, Xuejin and Xiong, Zhiwei}, title = {Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11104-11113} }
Producing and Leveraging Online Map Uncertainty in Trajectory Prediction: Xunjiang Gu,

Guanyu Song,

Igor Gilitschenski,

Marco Pavone,

Boris Ivanovic; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gu_2024_CVPR, author = {Gu, Xunjiang and Song, Guanyu and Gilitschenski, Igor and Pavone, Marco and Ivanovic, Boris}, title = {Producing and Leveraging Online Map Uncertainty in Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14521-14530} }
LASO: Language-guided Affordance Segmentation on 3D Object: Yicong Li,

Na Zhao,

Junbin Xiao,

Chun Feng,

Xiang Wang,

Tat-seng Chua; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yicong and Zhao, Na and Xiao, Junbin and Feng, Chun and Wang, Xiang and Chua, Tat-seng}, title = {LASO: Language-guided Affordance Segmentation on 3D Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14251-14260} }
Riemannian Multinomial Logistics Regression for SPD Neural Networks: Ziheng Chen,

Yue Song,

Gaowen Liu,

Ramana Rao Kompella,

Xiao-Jun Wu,

Nicu Sebe; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ziheng and Song, Yue and Liu, Gaowen and Kompella, Ramana Rao and Wu, Xiao-Jun and Sebe, Nicu}, title = {Riemannian Multinomial Logistics Regression for SPD Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17086-17096} }
What Sketch Explainability Really Means for Downstream Tasks?: Hmrishav Bandyopadhyay,

Pinaki Nath Chowdhury,

Ayan Kumar Bhunia,

Aneeshan Sain,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Sain, Aneeshan and Xiang, Tao and Song, Yi-Zhe}, title = {What Sketch Explainability Really Means for Downstream Tasks?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10997-11008} }
Neural Exposure Fusion for High-Dynamic Range Object Detection: Emmanuel Onzon,

Maximilian Bömer,

Fahim Mannan,

Felix Heide; [pdf] [supp]
[bibtex]
@InProceedings{Onzon_2024_CVPR, author = {Onzon, Emmanuel and B\"omer, Maximilian and Mannan, Fahim and Heide, Felix}, title = {Neural Exposure Fusion for High-Dynamic Range Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17564-17573} }
SFOD: Spiking Fusion Object Detector: Yimeng Fan,

Wei Zhang,

Changsong Liu,

Mingyang Li,

Wenrui Lu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Yimeng and Zhang, Wei and Liu, Changsong and Li, Mingyang and Lu, Wenrui}, title = {SFOD: Spiking Fusion Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17191-17200} }
OpenEQA: Embodied Question Answering in the Era of Foundation Models: Arjun Majumdar,

Anurag Ajay,

Xiaohan Zhang,

Pranav Putta,

Sriram Yenamandra,

Mikael Henaff,

Sneha Silwal,

Paul Mcvay,

Oleksandr Maksymets,

Sergio Arnaud,

Karmesh Yadav,

Qiyang Li,

Ben Newman,

Mohit Sharma,

Vincent Berges,

Shiqi Zhang,

Pulkit Agrawal,

Yonatan Bisk,

Dhruv Batra,

Mrinal Kalakrishnan,

Franziska Meier,

Chris Paxton,

Alexander Sax,

Aravind Rajeswaran; [pdf] [supp]
[bibtex]
@InProceedings{Majumdar_2024_CVPR, author = {Majumdar, Arjun and Ajay, Anurag and Zhang, Xiaohan and Putta, Pranav and Yenamandra, Sriram and Henaff, Mikael and Silwal, Sneha and Mcvay, Paul and Maksymets, Oleksandr and Arnaud, Sergio and Yadav, Karmesh and Li, Qiyang and Newman, Ben and Sharma, Mohit and Berges, Vincent and Zhang, Shiqi and Agrawal, Pulkit and Bisk, Yonatan and Batra, Dhruv and Kalakrishnan, Mrinal and Meier, Franziska and Paxton, Chris and Sax, Alexander and Rajeswaran, Aravind}, title = {OpenEQA: Embodied Question Answering in the Era of Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16488-16498} }
DePT: Decoupled Prompt Tuning: Ji Zhang,

Shihan Wu,

Lianli Gao,

Heng Tao Shen,

Jingkuan Song; [pdf] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ji and Wu, Shihan and Gao, Lianli and Shen, Heng Tao and Song, Jingkuan}, title = {DePT: Decoupled Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12924-12933} }
A Generative Approach for Wikipedia-Scale Visual Entity Recognition: Mathilde Caron,

Ahmet Iscen,

Alireza Fathi,

Cordelia Schmid; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Caron_2024_CVPR, author = {Caron, Mathilde and Iscen, Ahmet and Fathi, Alireza and Schmid, Cordelia}, title = {A Generative Approach for Wikipedia-Scale Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17313-17322} }
Open-Vocabulary Object 6D Pose Estimation: Jaime Corsetti,

Davide Boscaini,

Changjae Oh,

Andrea Cavallaro,

Fabio Poiesi; [pdf] [supp]
[bibtex]
@InProceedings{Corsetti_2024_CVPR, author = {Corsetti, Jaime and Boscaini, Davide and Oh, Changjae and Cavallaro, Andrea and Poiesi, Fabio}, title = {Open-Vocabulary Object 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18071-18080} }
Plug and Play Active Learning for Object Detection: Chenhongyi Yang,

Lichao Huang,

Elliot J. Crowley; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Chenhongyi and Huang, Lichao and Crowley, Elliot J.}, title = {Plug and Play Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17784-17793} }
LiSA: LiDAR Localization with Semantic Awareness: Bochun Yang,

Zijun Li,

Wen Li,

Zhipeng Cai,

Chenglu Wen,

Yu Zang,

Matthias Muller,

Cheng Wang; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Bochun and Li, Zijun and Li, Wen and Cai, Zhipeng and Wen, Chenglu and Zang, Yu and Muller, Matthias and Wang, Cheng}, title = {LiSA: LiDAR Localization with Semantic Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15271-15280} }
LMDrive: Closed-Loop End-to-End Driving with Large Language Models: Hao Shao,

Yuxuan Hu,

Letian Wang,

Guanglu Song,

Steven L. Waslander,

Yu Liu,

Hongsheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Hao and Hu, Yuxuan and Wang, Letian and Song, Guanglu and Waslander, Steven L. and Liu, Yu and Li, Hongsheng}, title = {LMDrive: Closed-Loop End-to-End Driving with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15120-15130} }
AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval: Sixing Yan,

William K. Cheung,

Ivor W. Tsang,

Keith Chiu,

Terence M. Tong,

Ka Chun Cheung,

Simon See; [pdf] [supp]
[bibtex]
@InProceedings{Yan_2024_CVPR, author = {Yan, Sixing and Cheung, William K. and Tsang, Ivor W. and Chiu, Keith and Tong, Terence M. and Cheung, Ka Chun and See, Simon}, title = {AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14324-14333} }
CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation: Jun Wang,

Yuzhe Qin,

Kaiming Kuang,

Yigit Korkmaz,

Akhilan Gurumoorthy,

Hao Su,

Xiaolong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Jun and Qin, Yuzhe and Kuang, Kaiming and Korkmaz, Yigit and Gurumoorthy, Akhilan and Su, Hao and Wang, Xiaolong}, title = {CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17952-17963} }
MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning: Mohamed Abdelfattah,

Mariam Hassan,

Alexandre Alahi; [pdf] [supp]
[bibtex]
@InProceedings{Abdelfattah_2024_CVPR, author = {Abdelfattah, Mohamed and Hassan, Mariam and Alahi, Alexandre}, title = {MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18678-18687} }
Narrative Action Evaluation with Prompt-Guided Multimodal Interaction: Shiyi Zhang,

Sule Bai,

Guangyi Chen,

Lei Chen,

Jiwen Lu,

Junle Wang,

Yansong Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shiyi and Bai, Sule and Chen, Guangyi and Chen, Lei and Lu, Jiwen and Wang, Junle and Tang, Yansong}, title = {Narrative Action Evaluation with Prompt-Guided Multimodal Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18430-18439} }
R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization: Kennard Yanting Chan,

Fayao Liu,

Guosheng Lin,

Chuan Sheng Foo,

Weisi Lin; [pdf] [supp]
[bibtex]
@InProceedings{Chan_2024_CVPR, author = {Chan, Kennard Yanting and Liu, Fayao and Lin, Guosheng and Foo, Chuan Sheng and Lin, Weisi}, title = {R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10304-10313} }
Validating Privacy-Preserving Face Recognition under a Minimum Assumption: Hui Zhang,

Xingbo Dong,

YenLung Lai,

Ying Zhou,

Xiaoyan Zhang,

Xingguo Lv,

Zhe Jin,

Xuejun Li; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hui and Dong, Xingbo and Lai, YenLung and Zhou, Ying and Zhang, Xiaoyan and Lv, Xingguo and Jin, Zhe and Li, Xuejun}, title = {Validating Privacy-Preserving Face Recognition under a Minimum Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12205-12214} }
Long-Tailed Anomaly Detection with Learnable Class Names: Chih-Hui Ho,

Kuan-Chuan Peng,

Nuno Vasconcelos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ho_2024_CVPR, author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno}, title = {Long-Tailed Anomaly Detection with Learnable Class Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12435-12446} }
Rapid 3D Model Generation with Intuitive 3D Input: Tianrun Chen,

Chaotao Ding,

Shangzhan Zhang,

Chunan Yu,

Ying Zang,

Zejian Li,

Sida Peng,

Lingyun Sun; [pdf]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tianrun and Ding, Chaotao and Zhang, Shangzhan and Yu, Chunan and Zang, Ying and Li, Zejian and Peng, Sida and Sun, Lingyun}, title = {Rapid 3D Model Generation with Intuitive 3D Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12554-12564} }
BoQ: A Place is Worth a Bag of Learnable Queries: Amar Ali-bey,

Brahim Chaib-draa,

Philippe Giguère; [pdf] [supp]
[bibtex]
@InProceedings{Ali-bey_2024_CVPR, author = {Ali-bey, Amar and Chaib-draa, Brahim and Gigu\`ere, Philippe}, title = {BoQ: A Place is Worth a Bag of Learnable Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17794-17803} }
GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence: Van Nguyen Nguyen,

Thibault Groueix,

Mathieu Salzmann,

Vincent Lepetit; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Salzmann, Mathieu and Lepetit, Vincent}, title = {GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9903-9913} }
Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation: Sixian Zhang,

Xinyao Yu,

Xinhang Song,

Xiaohan Wang,

Shuqiang Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Xiaohan and Jiang, Shuqiang}, title = {Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16414-16425} }
HIPTrack: Visual Tracking with Historical Prompts: Wenrui Cai,

Qingjie Liu,

Yunhong Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cai_2024_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {HIPTrack: Visual Tracking with Historical Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19258-19267} }
An N-Point Linear Solver for Line and Motion Estimation with Event Cameras: Ling Gao,

Daniel Gehrig,

Hang Su,

Davide Scaramuzza,

Laurent Kneip; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Ling and Gehrig, Daniel and Su, Hang and Scaramuzza, Davide and Kneip, Laurent}, title = {An N-Point Linear Solver for Line and Motion Estimation with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14596-14605} }
GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction: Xiao Chen,

Quanyi Li,

Tai Wang,

Tianfan Xue,

Jiangmiao Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xiao and Li, Quanyi and Wang, Tai and Xue, Tianfan and Pang, Jiangmiao}, title = {GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16436-16445} }
Taming Self-Training for Open-Vocabulary Object Detection: Shiyu Zhao,

Samuel Schulter,

Long Zhao,

Zhixing Zhang,

Vijay Kumar B G,

Yumin Suh,

Manmohan Chandraker,

Dimitris N. Metaxas; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Schulter, Samuel and Zhao, Long and Zhang, Zhixing and G, Vijay Kumar B and Suh, Yumin and Chandraker, Manmohan and Metaxas, Dimitris N.}, title = {Taming Self-Training for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13938-13947} }
Bilateral Propagation Network for Depth Completion: Jie Tang,

Fei-Peng Tian,

Boshi An,

Jian Li,

Ping Tan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Jie and Tian, Fei-Peng and An, Boshi and Li, Jian and Tan, Ping}, title = {Bilateral Propagation Network for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9763-9772} }
Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection: Ke Li,

Di Wang,

Zhangyuan Hu,

Wenxuan Zhu,

Shaofeng Li,

Quan Wang; [pdf]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Ke and Wang, Di and Hu, Zhangyuan and Zhu, Wenxuan and Li, Shaofeng and Wang, Quan}, title = {Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17323-17332} }
READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning: Takeru Oba,

Matthew Walter,

Norimichi Ukita; [pdf] [supp]
[bibtex]
@InProceedings{Oba_2024_CVPR, author = {Oba, Takeru and Walter, Matthew and Ukita, Norimichi}, title = {READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17974-17984} }
OVMR: Open-Vocabulary Recognition with Multi-Modal References: Zehong Ma,

Shiliang Zhang,

Longhui Wei,

Qi Tian; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Zehong and Zhang, Shiliang and Wei, Longhui and Tian, Qi}, title = {OVMR: Open-Vocabulary Recognition with Multi-Modal References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16571-16581} }
Global and Local Prompts Cooperation via Optimal Transport for Federated Learning: Hongxia Li,

Wei Huang,

Jingya Wang,

Ye Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hongxia and Huang, Wei and Wang, Jingya and Shi, Ye}, title = {Global and Local Prompts Cooperation via Optimal Transport for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12151-12161} }
Retrieval-Augmented Open-Vocabulary Object Detection: Jooyeon Kim,

Eulrang Cho,

Sehyung Kim,

Hyunwoo J. Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kim_2024_CVPR, author = {Kim, Jooyeon and Cho, Eulrang and Kim, Sehyung and Kim, Hyunwoo J.}, title = {Retrieval-Augmented Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17427-17436} }
MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning: Matteo Farina,

Massimiliano Mancini,

Elia Cunegatti,

Gaowen Liu,

Giovanni Iacca,

Elisa Ricci; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Farina_2024_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Cunegatti, Elia and Liu, Gaowen and Iacca, Giovanni and Ricci, Elisa}, title = {MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16185-16195} }
Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo: Zongrui Li,

Zhan Lu,

Haojie Yan,

Boxin Shi,

Gang Pan,

Qian Zheng,

Xudong Jiang; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Zongrui and Lu, Zhan and Yan, Haojie and Shi, Boxin and Pan, Gang and Zheng, Qian and Jiang, Xudong}, title = {Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11905-11914} }
MemoNav: Working Memory Model for Visual Navigation: Hongxin Li,

Zeyu Wang,

Xu Yang,

Yuran Yang,

Shuqi Mei,

Zhaoxiang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Hongxin and Wang, Zeyu and Yang, Xu and Yang, Yuran and Mei, Shuqi and Zhang, Zhaoxiang}, title = {MemoNav: Working Memory Model for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17913-17922} }
AssistGUI: Task-Oriented PC Graphical User Interface Automation: Difei Gao,

Lei Ji,

Zechen Bai,

Mingyu Ouyang,

Peiran Li,

Dongxing Mao,

Qinchen Wu,

Weichen Zhang,

Peiyi Wang,

Xiangwu Guo,

Hengxu Wang,

Luowei Zhou,

Mike Zheng Shou; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Difei and Ji, Lei and Bai, Zechen and Ouyang, Mingyu and Li, Peiran and Mao, Dongxing and Wu, Qinchen and Zhang, Weichen and Wang, Peiyi and Guo, Xiangwu and Wang, Hengxu and Zhou, Luowei and Shou, Mike Zheng}, title = {AssistGUI: Task-Oriented PC Graphical User Interface Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13289-13298} }
PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness: Anh-Quan Cao,

Angela Dai,

Raoul de Charette; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Anh-Quan and Dai, Angela and de Charette, Raoul}, title = {PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14554-14564} }
PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI: Yandan Yang,

Baoxiong Jia,

Peiyuan Zhi,

Siyuan Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yandan and Jia, Baoxiong and Zhi, Peiyuan and Huang, Siyuan}, title = {PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16262-16272} }
Harnessing Meta-Learning for Improving Full-Frame Video Stabilization: Muhammad Kashif Ali,

Eun Woo Im,

Dongjin Kim,

Tae Hyun Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ali_2024_CVPR, author = {Ali, Muhammad Kashif and Im, Eun Woo and Kim, Dongjin and Kim, Tae Hyun}, title = {Harnessing Meta-Learning for Improving Full-Frame Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12605-12614} }
How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?: Subhadeep Koley,

Ayan Kumar Bhunia,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16859-16869} }
ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval: Kaipeng Fang,

Jingkuan Song,

Lianli Gao,

Pengpeng Zeng,

Zhi-Qi Cheng,

Xiyao Li,

Heng Tao Shen; [pdf] [arXiv]
[bibtex]
@InProceedings{Fang_2024_CVPR, author = {Fang, Kaipeng and Song, Jingkuan and Gao, Lianli and Zeng, Pengpeng and Cheng, Zhi-Qi and Li, Xiyao and Shen, Heng Tao}, title = {ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17292-17301} }
Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation: Zhipeng Du,

Miaojing Shi,

Jiankang Deng; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Du_2024_CVPR, author = {Du, Zhipeng and Shi, Miaojing and Deng, Jiankang}, title = {Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12666-12676} }
Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation: Xiaoyang Chen,

Hao Zheng,

Yuemeng Li,

Yuncong Ma,

Liang Ma,

Hongming Li,

Yong Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Xiaoyang and Zheng, Hao and Li, Yuemeng and Ma, Yuncong and Ma, Liang and Li, Hongming and Fan, Yong}, title = {Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11747-11756} }
Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering: Zhaohe Liao,

Jiangtong Li,

Li Niu,

Liqing Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liao_2024_CVPR, author = {Liao, Zhaohe and Li, Jiangtong and Niu, Li and Zhang, Liqing}, title = {Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13395-13404} }
Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes: Chi-Hsi Kung,

Shu-Wei Lu,

Yi-Hsuan Tsai,

Yi-Ting Chen; [pdf] [supp]
[bibtex]
@InProceedings{Kung_2024_CVPR, author = {Kung, Chi-Hsi and Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18451-18461} }
Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning: Chen Tang,

Yuan Meng,

Jiacheng Jiang,

Shuzhao Xie,

Rongwei Lu,

Xinzhu Ma,

Zhi Wang,

Wenwu Zhu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tang_2024_CVPR, author = {Tang, Chen and Meng, Yuan and Jiang, Jiacheng and Xie, Shuzhao and Lu, Rongwei and Ma, Xinzhu and Wang, Zhi and Zhu, Wenwu}, title = {Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15855-15865} }
EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension: Jiaxuan Li,

Duc Minh Vo,

Akihiro Sugimoto,

Hideki Nakayama; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Jiaxuan and Vo, Duc Minh and Sugimoto, Akihiro and Nakayama, Hideki}, title = {EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13733-13742} }
SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction: Zechuan Zhang,

Zongxin Yang,

Yi Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zechuan and Yang, Zongxin and Yang, Yi}, title = {SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9936-9947} }
Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers: Jinxia Xie,

Bineng Zhong,

Zhiyi Mo,

Shengping Zhang,

Liangtao Shi,

Shuxiang Song,

Rongrong Ji; [pdf]
[bibtex]
@InProceedings{Xie_2024_CVPR, author = {Xie, Jinxia and Zhong, Bineng and Mo, Zhiyi and Zhang, Shengping and Shi, Liangtao and Song, Shuxiang and Ji, Rongrong}, title = {Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19300-19309} }
Lane2Seq: Towards Unified Lane Detection via Sequence Generation: Kunyang Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Kunyang}, title = {Lane2Seq: Towards Unified Lane Detection via Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16944-16953} }
LEMON: Learning 3D Human-Object Interaction Relation from 2D Images: Yuhang Yang,

Wei Zhai,

Hongchen Luo,

Yang Cao,

Zheng-Jun Zha; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yuhang and Zhai, Wei and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {LEMON: Learning 3D Human-Object Interaction Relation from 2D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16284-16295} }
Understanding Video Transformers via Universal Concept Discovery: Matthew Kowal,

Achal Dave,

Rares Ambrus,

Adrien Gaidon,

Konstantinos G. Derpanis,

Pavel Tokmakov; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Dave, Achal and Ambrus, Rares and Gaidon, Adrien and Derpanis, Konstantinos G. and Tokmakov, Pavel}, title = {Understanding Video Transformers via Universal Concept Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10946-10956} }
PointOBB: Learning Oriented Object Detection via Single Point Supervision: Junwei Luo,

Xue Yang,

Yi Yu,

Qingyun Li,

Junchi Yan,

Yansheng Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Luo_2024_CVPR, author = {Luo, Junwei and Yang, Xue and Yu, Yi and Li, Qingyun and Yan, Junchi and Li, Yansheng}, title = {PointOBB: Learning Oriented Object Detection via Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16730-16740} }
OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition: Jianqiang Wan,

Sibo Song,

Wenwen Yu,

Yuliang Liu,

Wenqing Cheng,

Fei Huang,

Xiang Bai,

Cong Yao,

Zhibo Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wan_2024_CVPR, author = {Wan, Jianqiang and Song, Sibo and Yu, Wenwen and Liu, Yuliang and Cheng, Wenqing and Huang, Fei and Bai, Xiang and Yao, Cong and Yang, Zhibo}, title = {OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15641-15653} }
Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation: Yunhe Gao; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Yunhe}, title = {Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11194-11204} }
MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections: Mude Hui,

Zihao Wei,

Hongru Zhu,

Fei Xia,

Yuyin Zhou; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hui_2024_CVPR, author = {Hui, Mude and Wei, Zihao and Zhu, Hongru and Xia, Fei and Zhou, Yuyin}, title = {MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11460-11469} }
Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning: Pierre Marza,

Laetitia Matignon,

Olivier Simonin,

Christian Wolf; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Marza_2024_CVPR, author = {Marza, Pierre and Matignon, Laetitia and Simonin, Olivier and Wolf, Christian}, title = {Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17847-17856} }
Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective: Jinjing Zhao,

Fangyun Wei,

Chang Xu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Xu, Chang}, title = {Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17416-17426} }
Video Harmonization with Triplet Spatio-Temporal Variation Patterns: Zonghui Guo,

Xinyu Han,

Jie Zhang,

Shiguang Shan,

Haiyong Zheng; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Zonghui and Han, Xinyu and Zhang, Jie and Shan, Shiguang and Zheng, Haiyong}, title = {Video Harmonization with Triplet Spatio-Temporal Variation Patterns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19177-19186} }
Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions: Oindrila Saha,

Grant Van Horn,

Subhransu Maji; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Saha_2024_CVPR, author = {Saha, Oindrila and Van Horn, Grant and Maji, Subhransu}, title = {Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17542-17552} }
CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition: Feng Lu,

Xiangyuan Lan,

Lijun Zhang,

Dongmei Jiang,

Yaowei Wang,

Chun Yuan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lu_2024_CVPR, author = {Lu, Feng and Lan, Xiangyuan and Zhang, Lijun and Jiang, Dongmei and Wang, Yaowei and Yuan, Chun}, title = {CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16772-16782} }
Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation: Shenshen Bu,

Taiji Li,

Yuedong Yang,

Zhiming Dai; [pdf]
[bibtex]
@InProceedings{Bu_2024_CVPR, author = {Bu, Shenshen and Li, Taiji and Yang, Yuedong and Dai, Zhiming}, title = {Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14194-14204} }
Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation: Ziyang Chen,

Yongsheng Pan,

Yiwen Ye,

Mengkang Lu,

Yong Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyang and Pan, Yongsheng and Ye, Yiwen and Lu, Mengkang and Xia, Yong}, title = {Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11184-11193} }
Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy: Gengyu Zhang,

Hao Tang,

Yan Yan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gengyu and Tang, Hao and Yan, Yan}, title = {Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17943-17951} }
All in One Framework for Multimodal Re-identification in the Wild: He Li,

Mang Ye,

Ming Zhang,

Bo Du; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, He and Ye, Mang and Zhang, Ming and Du, Bo}, title = {All in One Framework for Multimodal Re-identification in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17459-17469} }
Looking 3D: Anomaly Detection with 2D-3D Alignment: Ankan Bhunia,

Changjian Li,

Hakan Bilen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bhunia_2024_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Looking 3D: Anomaly Detection with 2D-3D Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17263-17272} }
VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift: Leyuan Liu,

Yuhan Li,

Yunqi Gao,

Changxin Gao,

Yuanyuan Liu,

Jingying Chen; [pdf] [supp]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Leyuan and Li, Yuhan and Gao, Yunqi and Gao, Changxin and Liu, Yuanyuan and Chen, Jingying}, title = {VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10498-10507} }
PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving: Xinshuo Weng,

Boris Ivanovic,

Yan Wang,

Yue Wang,

Marco Pavone; [pdf]
[bibtex]
@InProceedings{Weng_2024_CVPR, author = {Weng, Xinshuo and Ivanovic, Boris and Wang, Yan and Wang, Yue and Pavone, Marco}, title = {PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15449-15458} }
Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs: Shiyu Xuan,

Qingpei Guo,

Ming Yang,

Shiliang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xuan_2024_CVPR, author = {Xuan, Shiyu and Guo, Qingpei and Yang, Ming and Zhang, Shiliang}, title = {Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13838-13848} }
HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data: Qifan Yu,

Juncheng Li,

Longhui Wei,

Liang Pang,

Wentao Ye,

Bosheng Qin,

Siliang Tang,

Qi Tian,

Yueting Zhuang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Qifan and Li, Juncheng and Wei, Longhui and Pang, Liang and Ye, Wentao and Qin, Bosheng and Tang, Siliang and Tian, Qi and Zhuang, Yueting}, title = {HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12944-12953} }
C^2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction: Yiqun Lin,

Jiewen Yang,

Hualiang Wang,

Xinpeng Ding,

Wei Zhao,

Xiaomeng Li; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Yiqun and Yang, Jiewen and Wang, Hualiang and Ding, Xinpeng and Zhao, Wei and Li, Xiaomeng}, title = {C{\textasciicircum}2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11205-11214} }
GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds: Prashant Kumar,

Kshitij Madhav Bhat,

Vedang Bhupesh Shenvi Nadkarni,

Prem Kalra; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kumar_2024_CVPR, author = {Kumar, Prashant and Bhat, Kshitij Madhav and Nadkarni, Vedang Bhupesh Shenvi and Kalra, Prem}, title = {GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15152-15161} }
Commonsense Prototype for Outdoor Unsupervised 3D Object Detection: Hai Wu,

Shijia Zhao,

Xun Huang,

Chenglu Wen,

Xin Li,

Cheng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wu_2024_CVPR, author = {Wu, Hai and Zhao, Shijia and Huang, Xun and Wen, Chenglu and Li, Xin and Wang, Cheng}, title = {Commonsense Prototype for Outdoor Unsupervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14968-14977} }
Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation: Zihan Wang,

Xiangyang Li,

Jiahao Yang,

Yeqi Liu,

Junjie Hu,

Ming Jiang,

Shuqiang Jiang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Li, Xiangyang and Yang, Jiahao and Liu, Yeqi and Hu, Junjie and Jiang, Ming and Jiang, Shuqiang}, title = {Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13753-13762} }
Learning Vision from Models Rivals Learning Vision from Data: Yonglong Tian,

Lijie Fan,

Kaifeng Chen,

Dina Katabi,

Dilip Krishnan,

Phillip Isola; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Tian_2024_CVPR, author = {Tian, Yonglong and Fan, Lijie and Chen, Kaifeng and Katabi, Dina and Krishnan, Dilip and Isola, Phillip}, title = {Learning Vision from Models Rivals Learning Vision from Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15887-15898} }
Adapting Short-Term Transformers for Action Detection in Untrimmed Videos: Min Yang,

Huan Gao,

Ping Guo,

Limin Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Min and Gao, Huan and Guo, Ping and Wang, Limin}, title = {Adapting Short-Term Transformers for Action Detection in Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18570-18579} }
SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields: Quentin Herau,

Nathan Piasco,

Moussab Bennehar,

Luis Roldao,

Dzmitry Tsishkou,

Cyrille Migniot,

Pascal Vasseur,

Cédric Demonceaux; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Herau_2024_CVPR, author = {Herau, Quentin and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Migniot, Cyrille and Vasseur, Pascal and Demonceaux, C\'edric}, title = {SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15131-15140} }
G^3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding: Yuan Wang,

Yali Li,

Shengjin Wang; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Li, Yali and Wang, Shengjin}, title = {G{\textasciicircum}3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13917-13926} }
ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing: Kartik Thakral,

Shashikant Prasad,

Stuti Aswani,

Mayank Vatsa,

Richa Singh; [pdf] [supp]
[bibtex]
@InProceedings{Thakral_2024_CVPR, author = {Thakral, Kartik and Prasad, Shashikant and Aswani, Stuti and Vatsa, Mayank and Singh, Richa}, title = {ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10875-10884} }
Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents: Yuxi Wei,

Zi Wang,

Yifan Lu,

Chenxin Xu,

Changxing Liu,

Hao Zhao,

Siheng Chen,

Yanfeng Wang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Yuxi and Wang, Zi and Lu, Yifan and Xu, Chenxin and Liu, Changxing and Zhao, Hao and Chen, Siheng and Wang, Yanfeng}, title = {Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15077-15087} }
SnAG: Scalable and Accurate Video Grounding: Fangzhou Mu,

Sicheng Mo,

Yin Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Mo, Sicheng and Li, Yin}, title = {SnAG: Scalable and Accurate Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18930-18940} }
Building Vision-Language Models on Solid Foundations with Masked Distillation: Sepehr Sameni,

Kushal Kafle,

Hao Tan,

Simon Jenni; [pdf]
[bibtex]
@InProceedings{Sameni_2024_CVPR, author = {Sameni, Sepehr and Kafle, Kushal and Tan, Hao and Jenni, Simon}, title = {Building Vision-Language Models on Solid Foundations with Masked Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14216-14226} }
TransLoc4D: Transformer-based 4D Radar Place Recognition: Guohao Peng,

Heshan Li,

Yangyang Zhao,

Jun Zhang,

Zhenyu Wu,

Pengyu Zheng,

Danwei Wang; [pdf] [supp]
[bibtex]
@InProceedings{Peng_2024_CVPR, author = {Peng, Guohao and Li, Heshan and Zhao, Yangyang and Zhang, Jun and Wu, Zhenyu and Zheng, Pengyu and Wang, Danwei}, title = {TransLoc4D: Transformer-based 4D Radar Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17595-17605} }
Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization: Ioanna Ntinou,

Enrique Sanchez,

Georgios Tzimiropoulos; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ntinou_2024_CVPR, author = {Ntinou, Ioanna and Sanchez, Enrique and Tzimiropoulos, Georgios}, title = {Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18827-18836} }
Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption: Nobuhiko Wakai,

Satoshi Sato,

Yasunori Ishii,

Takayoshi Yamashita; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wakai_2024_CVPR, author = {Wakai, Nobuhiko and Sato, Satoshi and Ishii, Yasunori and Yamashita, Takayoshi}, title = {Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11884-11894} }
CSTA: CNN-based Spatiotemporal Attention for Video Summarization: Jaewon Son,

Jaehun Park,

Kwangsu Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Son_2024_CVPR, author = {Son, Jaewon and Park, Jaehun and Kim, Kwangsu}, title = {CSTA: CNN-based Spatiotemporal Attention for Video Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18847-18856} }
PEM: Prototype-based Efficient MaskFormer for Image Segmentation: Niccolò Cavagnero,

Gabriele Rosi,

Claudia Cuttano,

Francesca Pistilli,

Marco Ciccone,

Giuseppe Averta,

Fabio Cermelli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cavagnero_2024_CVPR, author = {Cavagnero, Niccol\`o and Rosi, Gabriele and Cuttano, Claudia and Pistilli, Francesca and Ciccone, Marco and Averta, Giuseppe and Cermelli, Fabio}, title = {PEM: Prototype-based Efficient MaskFormer for Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15804-15813} }
Referring Expression Counting: Siyang Dai,

Jun Liu,

Ngai-Man Cheung; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2024_CVPR, author = {Dai, Siyang and Liu, Jun and Cheung, Ngai-Man}, title = {Referring Expression Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16985-16995} }
Learning to Predict Activity Progress by Self-Supervised Video Alignment: Gerard Donahue,

Ehsan Elhamifar; [pdf] [supp]
[bibtex]
@InProceedings{Donahue_2024_CVPR, author = {Donahue, Gerard and Elhamifar, Ehsan}, title = {Learning to Predict Activity Progress by Self-Supervised Video Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18667-18677} }
VicTR: Video-conditioned Text Representations for Activity Recognition: Kumara Kahatapitiya,

Anurag Arnab,

Arsha Nagrani,

Michael S. Ryoo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Kahatapitiya_2024_CVPR, author = {Kahatapitiya, Kumara and Arnab, Anurag and Nagrani, Arsha and Ryoo, Michael S.}, title = {VicTR: Video-conditioned Text Representations for Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18547-18558} }
Label-Efficient Group Robustness via Out-of-Distribution Concept Curation: Yiwei Yang,

Anthony Z. Liu,

Robert Wolfe,

Aylin Caliskan,

Bill Howe; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Yiwei and Liu, Anthony Z. and Wolfe, Robert and Caliskan, Aylin and Howe, Bill}, title = {Label-Efficient Group Robustness via Out-of-Distribution Concept Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12426-12434} }
3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images: Yifang Men,

Hanxi Liu,

Yuan Yao,

Miaomiao Cui,

Xuansong Xie,

Zhouhui Lian; [pdf] [supp]
[bibtex]
@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Liu, Hanxi and Yao, Yuan and Cui, Miaomiao and Xie, Xuansong and Lian, Zhouhui}, title = {3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10127-10137} }
Investigating Compositional Challenges in Vision-Language Models for Visual Grounding: Yunan Zeng,

Yan Huang,

Jinjin Zhang,

Zequn Jie,

Zhenhua Chai,

Liang Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yunan and Huang, Yan and Zhang, Jinjin and Jie, Zequn and Chai, Zhenhua and Wang, Liang}, title = {Investigating Compositional Challenges in Vision-Language Models for Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14141-14151} }
6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation: Li Xu,

Haoxuan Qu,

Yujun Cai,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Li and Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9676-9686} }
Generative Region-Language Pretraining for Open-Ended Object Detection: Chuang Lin,

Yi Jiang,

Lizhen Qu,

Zehuan Yuan,

Jianfei Cai; [pdf] [arXiv]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Chuang and Jiang, Yi and Qu, Lizhen and Yuan, Zehuan and Cai, Jianfei}, title = {Generative Region-Language Pretraining for Open-Ended Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13958-13968} }
Enhancing Post-training Quantization Calibration through Contrastive Learning: Yuzhang Shang,

Gaowen Liu,

Ramana Rao Kompella,

Yan Yan; [pdf]
[bibtex]
@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Enhancing Post-training Quantization Calibration through Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15921-15930} }
Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models: Xin Li,

Yunfei Wu,

Xinghua Jiang,

Zhihao Guo,

Mingming Gong,

Haoyu Cao,

Yinsong Liu,

Deqiang Jiang,

Xing Sun; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Xin and Wu, Yunfei and Jiang, Xinghua and Guo, Zhihao and Gong, Mingming and Cao, Haoyu and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15546-15555} }
Data Valuation and Detections in Federated Learning: Wenqian Li,

Shuran Fu,

Fengrui Zhang,

Yan Pang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Wenqian and Fu, Shuran and Zhang, Fengrui and Pang, Yan}, title = {Data Valuation and Detections in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12027-12036} }
Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer: Hyeongjin Nam,

Daniel Sungho Jung,

Gyeongsik Moon,

Kyoung Mu Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nam_2024_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Moon, Gyeongsik and Lee, Kyoung Mu}, title = {Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10218-10227} }
TIM: A Time Interval Machine for Audio-Visual Action Recognition: Jacob Chalk,

Jaesung Huh,

Evangelos Kazakos,

Andrew Zisserman,

Dima Damen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chalk_2024_CVPR, author = {Chalk, Jacob and Huh, Jaesung and Kazakos, Evangelos and Zisserman, Andrew and Damen, Dima}, title = {TIM: A Time Interval Machine for Audio-Visual Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18153-18163} }
Would Deep Generative Models Amplify Bias in Future Models?: Tianwei Chen,

Yusuke Hirota,

Mayu Otani,

Noa Garcia,

Yuta Nakashima; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tianwei and Hirota, Yusuke and Otani, Mayu and Garcia, Noa and Nakashima, Yuta}, title = {Would Deep Generative Models Amplify Bias in Future Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10833-10843} }
CogAgent: A Visual Language Model for GUI Agents: Wenyi Hong,

Weihan Wang,

Qingsong Lv,

Jiazheng Xu,

Wenmeng Yu,

Junhui Ji,

Yan Wang,

Zihan Wang,

Yuxiao Dong,

Ming Ding,

Jie Tang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Hong_2024_CVPR, author = {Hong, Wenyi and Wang, Weihan and Lv, Qingsong and Xu, Jiazheng and Yu, Wenmeng and Ji, Junhui and Wang, Yan and Wang, Zihan and Dong, Yuxiao and Ding, Ming and Tang, Jie}, title = {CogAgent: A Visual Language Model for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14281-14290} }
AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving: Mingfu Liang,

Jong-Chyi Su,

Samuel Schulter,

Sparsh Garg,

Shiyu Zhao,

Ying Wu,

Manmohan Chandraker; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Mingfu and Su, Jong-Chyi and Schulter, Samuel and Garg, Sparsh and Zhao, Shiyu and Wu, Ying and Chandraker, Manmohan}, title = {AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14695-14706} }
Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households: Zhihao Cao,

Zidong Wang,

Siwen Xie,

Anji Liu,

Lifeng Fan; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Zhihao and Wang, Zidong and Xie, Siwen and Liu, Anji and Fan, Lifeng}, title = {Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18091-18101} }
Rapid Motor Adaptation for Robotic Manipulator Arms: Yichao Liang,

Kevin Ellis,

João Henriques; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liang_2024_CVPR, author = {Liang, Yichao and Ellis, Kevin and Henriques, Jo\~ao}, title = {Rapid Motor Adaptation for Robotic Manipulator Arms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16404-16413} }
WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts: Yong Hyun Ahn,

Hyeon Bae Kim,

Seong Tae Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ahn_2024_CVPR, author = {Ahn, Yong Hyun and Kim, Hyeon Bae and Kim, Seong Tae}, title = {WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10968-10977} }
CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection: Haonan Zhang,

Longjun Liu,

Yuqi Huang,

Zhao Yang,

Xinyu Lei,

Bihan Wen; [pdf] [supp]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haonan and Liu, Longjun and Huang, Yuqi and Yang, Zhao and Lei, Xinyu and Wen, Bihan}, title = {CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15331-15341} }
ICP-Flow: LiDAR Scene Flow Estimation with ICP: Yancong Lin,

Holger Caesar; [pdf] [supp]
[bibtex]
@InProceedings{Lin_2024_CVPR, author = {Lin, Yancong and Caesar, Holger}, title = {ICP-Flow: LiDAR Scene Flow Estimation with ICP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15501-15511} }
MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer: Jianjian Cao,

Peng Ye,

Shengze Li,

Chong Yu,

Yansong Tang,

Jiwen Lu,

Tao Chen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cao_2024_CVPR, author = {Cao, Jianjian and Ye, Peng and Li, Shengze and Yu, Chong and Tang, Yansong and Lu, Jiwen and Chen, Tao}, title = {MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15710-15719} }
G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images: Zixiong Huang,

Qi Chen,

Libo Sun,

Yifan Yang,

Naizhou Wang,

Qi Wu,

Mingkui Tan; [pdf] [supp]
[bibtex]
@InProceedings{Huang_2024_CVPR, author = {Huang, Zixiong and Chen, Qi and Sun, Libo and Yang, Yifan and Wang, Naizhou and Wu, Qi and Tan, Mingkui}, title = {G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10117-10126} }
SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency: Paul Roetzer,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Roetzer_2024_CVPR, author = {Roetzer, Paul and Bernard, Florian}, title = {SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14543-14553} }
Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception: Lei Fan,

Mingfu Liang,

Yunxuan Li,

Gang Hua,

Ying Wu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Liang, Mingfu and Li, Yunxuan and Hua, Gang and Wu, Ying}, title = {Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16351-16361} }
The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement: Gabriele Trivigno,

Carlo Masone,

Barbara Caputo,

Torsten Sattler; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Trivigno_2024_CVPR, author = {Trivigno, Gabriele and Masone, Carlo and Caputo, Barbara and Sattler, Torsten}, title = {The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12786-12798} }
CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor: Shuyang Sun,

Runjia Li,

Philip Torr,

Xiuye Gu,

Siyang Li; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Sun_2024_CVPR, author = {Sun, Shuyang and Li, Runjia and Torr, Philip and Gu, Xiuye and Li, Siyang}, title = {CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13171-13182} }
Active Generalized Category Discovery: Shijie Ma,

Fei Zhu,

Zhun Zhong,

Xu-Yao Zhang,

Cheng-Lin Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ma_2024_CVPR, author = {Ma, Shijie and Zhu, Fei and Zhong, Zhun and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Active Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16890-16900} }
OpenBias: Open-set Bias Detection in Text-to-Image Generative Models: Moreno D'Incà,

Elia Peruzzo,

Massimiliano Mancini,

Dejia Xu,

Vidit Goel,

Xingqian Xu,

Zhangyang Wang,

Humphrey Shi,

Nicu Sebe; [pdf] [supp]
[bibtex]
@InProceedings{D'Inca_2024_CVPR, author = {D'Inc\`a, Moreno and Peruzzo, Elia and Mancini, Massimiliano and Xu, Dejia and Goel, Vidit and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey and Sebe, Nicu}, title = {OpenBias: Open-set Bias Detection in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12225-12235} }
3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features: Chenfeng Xu,

Huan Ling,

Sanja Fidler,

Or Litany; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Chenfeng and Ling, Huan and Fidler, Sanja and Litany, Or}, title = {3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10617-10627} }
LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction: Linqing Zhao,

Xiuwei Xu,

Ziwei Wang,

Yunpeng Zhang,

Borui Zhang,

Wenzhao Zheng,

Dalong Du,

Jie Zhou,

Jiwen Lu; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Linqing and Xu, Xiuwei and Wang, Ziwei and Zhang, Yunpeng and Zhang, Borui and Zheng, Wenzhao and Du, Dalong and Zhou, Jie and Lu, Jiwen}, title = {LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9806-9815} }
Novel View Synthesis with View-Dependent Effects from a Single Image: Juan Luis Gonzalez Bello,

Munchurl Kim; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Bello_2024_CVPR, author = {Bello, Juan Luis Gonzalez and Kim, Munchurl}, title = {Novel View Synthesis with View-Dependent Effects from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10413-10423} }
Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision: Yi Yu,

Xue Yang,

Qingyun Li,

Feipeng Da,

Jifeng Dai,

Yu Qiao,

Junchi Yan; [pdf] [arXiv]
[bibtex]
@InProceedings{Yu_2024_CVPR, author = {Yu, Yi and Yang, Xue and Li, Qingyun and Da, Feipeng and Dai, Jifeng and Qiao, Yu and Yan, Junchi}, title = {Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16783-16793} }
HRVDA: High-Resolution Visual Document Assistant: Chaohu Liu,

Kun Yin,

Haoyu Cao,

Xinghua Jiang,

Xin Li,

Yinsong Liu,

Deqiang Jiang,

Xing Sun,

Linli Xu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Chaohu and Yin, Kun and Cao, Haoyu and Jiang, Xinghua and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing and Xu, Linli}, title = {HRVDA: High-Resolution Visual Document Assistant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15534-15545} }
Learning for Transductive Threshold Calibration in Open-World Recognition: Qin Zhang,

Dongsheng An,

Tianjun Xiao,

Tong He,

Qingming Tang,

Ying Nian Wu,

Joseph Tighe,

Yifan Xing; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qin and An, Dongsheng and Xiao, Tianjun and He, Tong and Tang, Qingming and Wu, Ying Nian and Tighe, Joseph and Xing, Yifan}, title = {Learning for Transductive Threshold Calibration in Open-World Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17097-17106} }
Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation: Xingqun Qi,

Jiahao Pan,

Peng Li,

Ruibin Yuan,

Xiaowei Chi,

Mengfei Li,

Wenhan Luo,

Wei Xue,

Shanghang Zhang,

Qifeng Liu,

Yike Guo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Xingqun and Pan, Jiahao and Li, Peng and Yuan, Ruibin and Chi, Xiaowei and Li, Mengfei and Luo, Wenhan and Xue, Wei and Zhang, Shanghang and Liu, Qifeng and Guo, Yike}, title = {Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10424-10434} }
Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models: Shitian Zhao,

Zhuowan Li,

Yadong Lu,

Alan Yuille,

Yan Wang; [pdf] [supp]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shitian and Li, Zhuowan and Lu, Yadong and Yuille, Alan and Wang, Yan}, title = {Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13342-13351} }
Brush2Prompt: Contextual Prompt Generator for Object Inpainting: Mang Tik Chiu,

Yuqian Zhou,

Lingzhi Zhang,

Zhe Lin,

Connelly Barnes,

Sohrab Amirghodsi,

Eli Shechtman,

Humphrey Shi; [pdf] [supp]
[bibtex]
@InProceedings{Chiu_2024_CVPR, author = {Chiu, Mang Tik and Zhou, Yuqian and Zhang, Lingzhi and Lin, Zhe and Barnes, Connelly and Amirghodsi, Sohrab and Shechtman, Eli and Shi, Humphrey}, title = {Brush2Prompt: Contextual Prompt Generator for Object Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12636-12645} }
Joint-Task Regularization for Partially Labeled Multi-Task Learning: Kento Nishi,

Junsik Kim,

Wanhua Li,

Hanspeter Pfister; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nishi_2024_CVPR, author = {Nishi, Kento and Kim, Junsik and Li, Wanhua and Pfister, Hanspeter}, title = {Joint-Task Regularization for Partially Labeled Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16152-16162} }
Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification: Bin Yang,

Jun Chen,

Mang Ye; [pdf] [supp]
[bibtex]
@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Chen, Jun and Ye, Mang}, title = {Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16870-16879} }
Context-Aware Integration of Language and Visual References for Natural Language Tracking: Yanyan Shao,

Shuting He,

Qi Ye,

Yuchao Feng,

Wenhan Luo,

Jiming Chen; [pdf] [arXiv]
[bibtex]
@InProceedings{Shao_2024_CVPR, author = {Shao, Yanyan and He, Shuting and Ye, Qi and Feng, Yuchao and Luo, Wenhan and Chen, Jiming}, title = {Context-Aware Integration of Language and Visual References for Natural Language Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19208-19217} }
An Edit Friendly DDPM Noise Space: Inversion and Manipulations: Inbar Huberman-Spiegelglas,

Vladimir Kulikov,

Tomer Michaeli; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Huberman-Spiegelglas_2024_CVPR, author = {Huberman-Spiegelglas, Inbar and Kulikov, Vladimir and Michaeli, Tomer}, title = {An Edit Friendly DDPM Noise Space: Inversion and Manipulations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12469-12478} }
RoDLA: Benchmarking the Robustness of Document Layout Analysis Models: Yufan Chen,

Jiaming Zhang,

Kunyu Peng,

Junwei Zheng,

Ruiping Liu,

Philip Torr,

Rainer Stiefelhagen; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Yufan and Zhang, Jiaming and Peng, Kunyu and Zheng, Junwei and Liu, Ruiping and Torr, Philip and Stiefelhagen, Rainer}, title = {RoDLA: Benchmarking the Robustness of Document Layout Analysis Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15556-15566} }
BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks: Shangqian Gao,

Yanfu Zhang,

Feihu Huang,

Heng Huang; [pdf] [supp]
[bibtex]
@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Zhang, Yanfu and Huang, Feihu and Huang, Heng}, title = {BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16090-16100} }
IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse: Yunshu Dai,

Jianwei Fei,

Fangjun Huang; [pdf] [supp]
[bibtex]
@InProceedings{Dai_2024_CVPR, author = {Dai, Yunshu and Fei, Jianwei and Huang, Fangjun}, title = {IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11934-11943} }
Viewpoint-Aware Visual Grounding in 3D Scenes: Xiangxi Shi,

Zhonghua Wu,

Stefan Lee; [pdf] [supp]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Xiangxi and Wu, Zhonghua and Lee, Stefan}, title = {Viewpoint-Aware Visual Grounding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14056-14065} }
CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation: Lingjun Zhao,

Jingyu Song,

Katherine A. Skinner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lingjun and Song, Jingyu and Skinner, Katherine A.}, title = {CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15470-15480} }
CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering: Shaowei Wang,

Lingling Zhang,

Longji Zhu,

Tao Qin,

Kim-Hui Yap,

Xinyu Zhang,

Jun Liu; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Shaowei and Zhang, Lingling and Zhu, Longji and Qin, Tao and Yap, Kim-Hui and Zhang, Xinyu and Liu, Jun}, title = {CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13969-13979} }
Transferable and Principled Efficiency for Open-Vocabulary Segmentation: Jingxuan Xu,

Wuyang Chen,

Yao Zhao,

Yunchao Wei; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xu_2024_CVPR, author = {Xu, Jingxuan and Chen, Wuyang and Zhao, Yao and Wei, Yunchao}, title = {Transferable and Principled Efficiency for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15814-15824} }
EvDiG: Event-guided Direct and Global Components Separation: Xinyu Zhou,

Peiqi Duan,

Boyu Li,

Chu Zhou,

Chao Xu,

Boxin Shi; [pdf] [supp]
[bibtex]
@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xinyu and Duan, Peiqi and Li, Boyu and Zhou, Chu and Xu, Chao and Shi, Boxin}, title = {EvDiG: Event-guided Direct and Global Components Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9612-9621} }
Feedback-Guided Autonomous Driving: Jimuyang Zhang,

Zanming Huang,

Arijit Ray,

Eshed Ohn-Bar; [pdf]
[bibtex]
@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jimuyang and Huang, Zanming and Ray, Arijit and Ohn-Bar, Eshed}, title = {Feedback-Guided Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15000-15011} }
DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency: Heng Guo,

Jieji Ren,

Feishi Wang,

Boxin Shi,

Mingjun Ren,

Yasuyuki Matsushita; [pdf] [supp]
[bibtex]
@InProceedings{Guo_2024_CVPR, author = {Guo, Heng and Ren, Jieji and Wang, Feishi and Shi, Boxin and Ren, Mingjun and Matsushita, Yasuyuki}, title = {DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11810-11820} }
De-Diffusion Makes Text a Strong Cross-Modal Interface: Chen Wei,

Chenxi Liu,

Siyuan Qiao,

Zhishuai Zhang,

Alan Yuille,

Jiahui Yu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wei_2024_CVPR, author = {Wei, Chen and Liu, Chenxi and Qiao, Siyuan and Zhang, Zhishuai and Yuille, Alan and Yu, Jiahui}, title = {De-Diffusion Makes Text a Strong Cross-Modal Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13492-13503} }
End-to-End Spatio-Temporal Action Localisation with Video Transformers: Alexey A. Gritsenko,

Xuehan Xiong,

Josip Djolonga,

Mostafa Dehghani,

Chen Sun,

Mario Lucic,

Cordelia Schmid,

Anurag Arnab; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Gritsenko_2024_CVPR, author = {Gritsenko, Alexey A. and Xiong, Xuehan and Djolonga, Josip and Dehghani, Mostafa and Sun, Chen and Lucic, Mario and Schmid, Cordelia and Arnab, Anurag}, title = {End-to-End Spatio-Temporal Action Localisation with Video Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18373-18383} }
End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames: Shuming Liu,

Chen-Lin Zhang,

Chen Zhao,

Bernard Ghanem; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Shuming and Zhang, Chen-Lin and Zhao, Chen and Ghanem, Bernard}, title = {End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18591-18601} }
TransNeXt: Robust Foveal Visual Perception for Vision Transformers: Dai Shi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Shi_2024_CVPR, author = {Shi, Dai}, title = {TransNeXt: Robust Foveal Visual Perception for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17773-17783} }
Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction: Guillaume Jaume,

Anurag Vaidya,

Richard J. Chen,

Drew F.K. Williamson,

Paul Pu Liang,

Faisal Mahmood; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Liang, Paul Pu and Mahmood, Faisal}, title = {Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11579-11590} }
Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation: Hoang Chuong Nguyen,

Tianyu Wang,

Jose M. Alvarez,

Miaomiao Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Hoang Chuong and Wang, Tianyu and Alvarez, Jose M. and Liu, Miaomiao}, title = {Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10446-10455} }
Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models: David Stotko,

Nils Wandel,

Reinhard Klein; [pdf] [supp]
[bibtex]
@InProceedings{Stotko_2024_CVPR, author = {Stotko, David and Wandel, Nils and Klein, Reinhard}, title = {Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11895-11904} }
You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval: Subhadeep Koley,

Ayan Kumar Bhunia,

Aneeshan Sain,

Pinaki Nath Chowdhury,

Tao Xiang,

Yi-Zhe Song; [pdf] [supp]
[bibtex]
@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16509-16519} }
Unsupervised 3D Structure Inference from Category-Specific Image Collections: Weikang Wang,

Dongliang Cao,

Florian Bernard; [pdf] [supp]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Weikang and Cao, Dongliang and Bernard, Florian}, title = {Unsupervised 3D Structure Inference from Category-Specific Image Collections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10704-10714} }
DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations: Maximilian Augustin,

Yannic Neuhaus,

Matthias Hein; [pdf] [supp]
[bibtex]
@InProceedings{Augustin_2024_CVPR, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11093-11103} }
RepViT: Revisiting Mobile CNN From ViT Perspective: Ao Wang,

Hui Chen,

Zijia Lin,

Jungong Han,

Guiguang Ding; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {RepViT: Revisiting Mobile CNN From ViT Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15909-15920} }
MonoNPHM: Dynamic Head Reconstruction from Monocular Videos: Simon Giebenhain,

Tobias Kirschstein,

Markos Georgopoulos,

Martin Rünz,

Lourdes Agapito,

Matthias Nießner; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Giebenhain_2024_CVPR, author = {Giebenhain, Simon and Kirschstein, Tobias and Georgopoulos, Markos and R\"unz, Martin and Agapito, Lourdes and Nie{\ss}ner, Matthias}, title = {MonoNPHM: Dynamic Head Reconstruction from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10747-10758} }
Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization: Ziying Xia,

Jian Cheng,

Siyu Liu,

Yongxiang Hu,

Shiguang Wang,

Yijie Zhang,

Liwan Dang; [pdf] [supp]
[bibtex]
@InProceedings{Xia_2024_CVPR, author = {Xia, Ziying and Cheng, Jian and Liu, Siyu and Hu, Yongxiang and Wang, Shiguang and Zhang, Yijie and Dang, Liwan}, title = {Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18440-18450} }
Theoretically Achieving Continuous Representation of Oriented Bounding Boxes: Zikai Xiao,

Guoye Yang,

Xue Yang,

Taijiang Mu,

Junchi Yan,

Shimin Hu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zikai and Yang, Guoye and Yang, Xue and Mu, Taijiang and Yan, Junchi and Hu, Shimin}, title = {Theoretically Achieving Continuous Representation of Oriented Bounding Boxes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16912-16922} }
Learning Large-Factor EM Image Super-Resolution with Generative Priors: Jiateng Shou,

Zeyu Xiao,

Shiyu Deng,

Wei Huang,

Peiyao Shi,

Ruobing Zhang,

Zhiwei Xiong,

Feng Wu; [pdf] [supp]
[bibtex]
@InProceedings{Shou_2024_CVPR, author = {Shou, Jiateng and Xiao, Zeyu and Deng, Shiyu and Huang, Wei and Shi, Peiyao and Zhang, Ruobing and Xiong, Zhiwei and Wu, Feng}, title = {Learning Large-Factor EM Image Super-Resolution with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11313-11322} }
Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving: Junda Cheng,

Wei Yin,

Kaixuan Wang,

Xiaozhi Chen,

Shijie Wang,

Xin Yang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Cheng_2024_CVPR, author = {Cheng, Junda and Yin, Wei and Wang, Kaixuan and Chen, Xiaozhi and Wang, Shijie and Yang, Xin}, title = {Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10138-10147} }
Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning: Yiwen Ye,

Yutong Xie,

Jianpeng Zhang,

Ziyang Chen,

Qi Wu,

Yong Xia; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Ye_2024_CVPR, author = {Ye, Yiwen and Xie, Yutong and Zhang, Jianpeng and Chen, Ziyang and Wu, Qi and Xia, Yong}, title = {Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11114-11124} }
Towards Efficient Replay in Federated Incremental Learning: Yichen Li,

Qunwei Li,

Haozhao Wang,

Ruixuan Li,

Wenliang Zhong,

Guannan Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Li, Qunwei and Wang, Haozhao and Li, Ruixuan and Zhong, Wenliang and Zhang, Guannan}, title = {Towards Efficient Replay in Federated Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12820-12829} }
SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models: Feifei Wang,

Zhentao Tan,

Tianyi Wei,

Yue Wu,

Qidong Huang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Feifei and Tan, Zhentao and Wei, Tianyi and Wu, Yue and Huang, Qidong}, title = {SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12047-12056} }
Fair-VPT: Fair Visual Prompt Tuning for Image Classification: Sungho Park,

Hyeran Byun; [pdf] [supp]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Sungho and Byun, Hyeran}, title = {Fair-VPT: Fair Visual Prompt Tuning for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12268-12278} }
CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving: Mozhgan Pourkeshavarz,

Junrui Zhang,

Amir Rasouli; [pdf] [supp]
[bibtex]
@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Zhang, Junrui and Rasouli, Amir}, title = {CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14874-14884} }
Prompting Vision Foundation Models for Pathology Image Analysis: Chong Yin,

Siqi Liu,

Kaiyang Zhou,

Vincent Wai-Sun Wong,

Pong C. Yuen; [pdf] [supp]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Zhou, Kaiyang and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {Prompting Vision Foundation Models for Pathology Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11292-11301} }
SEED-Bench: Benchmarking Multimodal Large Language Models: Bohao Li,

Yuying Ge,

Yixiao Ge,

Guangzhi Wang,

Rui Wang,

Ruimao Zhang,

Ying Shan; [pdf] [supp]
[bibtex]
@InProceedings{Li_2024_CVPR, author = {Li, Bohao and Ge, Yuying and Ge, Yixiao and Wang, Guangzhi and Wang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SEED-Bench: Benchmarking Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13299-13308} }
Object Pose Estimation via the Aggregation of Diffusion Features: Tianfu Wang,

Guosheng Hu,

Hongguang Wang; [pdf] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Tianfu and Hu, Guosheng and Wang, Hongguang}, title = {Object Pose Estimation via the Aggregation of Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10238-10247} }
Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers: Tsai-Shien Chen,

Aliaksandr Siarohin,

Willi Menapace,

Ekaterina Deyneka,

Hsiang-wei Chao,

Byung Eun Jeon,

Yuwei Fang,

Hsin-Ying Lee,

Jian Ren,

Ming-Hsuan Yang,

Sergey Tulyakov; [pdf] [supp]
[bibtex]
@InProceedings{Chen_2024_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Deyneka, Ekaterina and Chao, Hsiang-wei and Jeon, Byung Eun and Fang, Yuwei and Lee, Hsin-Ying and Ren, Jian and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13320-13331} }
Infrared Small Target Detection with Scale and Location Sensitivity: Qiankun Liu,

Rui Liu,

Bolun Zheng,

Hongkui Wang,

Ying Fu; [pdf] [arXiv]
[bibtex]
@InProceedings{Liu_2024_CVPR, author = {Liu, Qiankun and Liu, Rui and Zheng, Bolun and Wang, Hongkui and Fu, Ying}, title = {Infrared Small Target Detection with Scale and Location Sensitivity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17490-17499} }
Self-supervised Debiasing Using Low Rank Regularization: Geon Yeong Park,

Chanyong Jung,

Sangmin Lee,

Jong Chul Ye,

Sang Wan Lee; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Geon Yeong and Jung, Chanyong and Lee, Sangmin and Ye, Jong Chul and Lee, Sang Wan}, title = {Self-supervised Debiasing Using Low Rank Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12395-12405} }
Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning: Leonardo Iurada,

Marco Ciccone,

Tatiana Tommasi; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Iurada_2024_CVPR, author = {Iurada, Leonardo and Ciccone, Marco and Tommasi, Tatiana}, title = {Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16142-16151} }
InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields: Dongqing Wang,

Tong Zhang,

Alaa Abboud,

Sabine Süsstrunk; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Wang_2024_CVPR, author = {Wang, Dongqing and Zhang, Tong and Abboud, Alaa and S\"usstrunk, Sabine}, title = {InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12677-12686} }
IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection: Junbo Yin,

Jianbing Shen,

Runnan Chen,

Wei Li,

Ruigang Yang,

Pascal Frossard,

Wenguan Wang; [pdf]
[bibtex]
@InProceedings{Yin_2024_CVPR, author = {Yin, Junbo and Shen, Jianbing and Chen, Runnan and Li, Wei and Yang, Ruigang and Frossard, Pascal and Wang, Wenguan}, title = {IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14905-14915} }
Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair: Jeonghoon Park,

Chaeyeon Chung,

Jaegul Choo; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Park_2024_CVPR, author = {Park, Jeonghoon and Chung, Chaeyeon and Choo, Jaegul}, title = {Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12332-12341} }
Compositional Chain-of-Thought Prompting for Large Multimodal Models: Chancharik Mitra,

Brandon Huang,

Trevor Darrell,

Roei Herzig; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Mitra_2024_CVPR, author = {Mitra, Chancharik and Huang, Brandon and Darrell, Trevor and Herzig, Roei}, title = {Compositional Chain-of-Thought Prompting for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14420-14431} }
Diffusion Time-step Curriculum for One Image to 3D Generation: Xuanyu Yi,

Zike Wu,

Qingshan Xu,

Pan Zhou,

Joo-Hwee Lim,

Hanwang Zhang; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Yi_2024_CVPR, author = {Yi, Xuanyu and Wu, Zike and Xu, Qingshan and Zhou, Pan and Lim, Joo-Hwee and Zhang, Hanwang}, title = {Diffusion Time-step Curriculum for One Image to 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9948-9958} }
Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning: Fan Qi,

Shuai Li; [pdf] [supp]
[bibtex]
@InProceedings{Qi_2024_CVPR, author = {Qi, Fan and Li, Shuai}, title = {Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12312-12321} }
SPIN: Simultaneous Perception Interaction and Navigation: Shagun Uppal,

Ananye Agarwal,

Haoyu Xiong,

Kenneth Shaw,

Deepak Pathak; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Uppal_2024_CVPR, author = {Uppal, Shagun and Agarwal, Ananye and Xiong, Haoyu and Shaw, Kenneth and Pathak, Deepak}, title = {SPIN: Simultaneous Perception Interaction and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18133-18142} }
Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection: Ting Lei,

Shaofeng Yin,

Yang Liu; [pdf] [supp] [arXiv]
[bibtex]
@InProceedings{Lei_2024_CVPR, author = {Lei, Ting and Yin, Shaofeng and Liu, Yang}, title = {Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16657-16667} }; Back