Papers
- Back
Seeing the World through Your Eyes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alzayer_2024_CVPR, author = {Alzayer, Hadi and Zhang, Kevin and Feng, Brandon and Metzler, Christopher A. and Huang, Jia-Bin}, title = {Seeing the World through Your Eyes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4864-4873} }
Ungeneralizable Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Wang, Xinchao}, title = {Ungeneralizable Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11944-11953} }
LaneCPP: Continuous 3D Lane Detection using Physical Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pittner_2024_CVPR, author = {Pittner, Maximilian and Janai, Joel and Condurache, Alexandru P.}, title = {LaneCPP: Continuous 3D Lane Detection using Physical Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10639-10648} }
CityDreamer: Compositional Generative Model of Unbounded 3D Cities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Haozhe and Chen, Zhaoxi and Hong, Fangzhou and Liu, Ziwei}, title = {CityDreamer: Compositional Generative Model of Unbounded 3D Cities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9666-9675} }
Action Detection via an Image Diffusion Process-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Foo_2024_CVPR, author = {Foo, Lin Geng and Li, Tianjiao and Rahmani, Hossein and Liu, Jun}, title = {Action Detection via an Image Diffusion Process}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18351-18361} }
ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Xiangjun and Li, Xiaoyu and Zhang, Chaopeng and Zhang, Qi and Cao, Yanpei and Shan, Ying and Quan, Long}, title = {ConTex-Human: Free-View Rendering of Human from a Single Image with Texture-Consistent Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10084-10094} }
Streaming Dense Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xingyi and Arnab, Anurag and Buch, Shyamal and Yan, Shen and Myers, Austin and Xiong, Xuehan and Nagrani, Arsha and Schmid, Cordelia}, title = {Streaming Dense Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18243-18252} }
Rethinking Inductive Biases for Surface Normal Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Gwangbin and Davison, Andrew J.}, title = {Rethinking Inductive Biases for Surface Normal Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9535-9545} }
Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yuhang and Huang, Wenke and Ye, Mang}, title = {Fair Federated Learning under Domain Skew with Local Consistency and Domain Diversity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12077-12086} }
HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Trong-Thuan and Nguyen, Pha and Luu, Khoa}, title = {HIG: Hierarchical Interlacement Graph Approach to Scene Graph Generation in Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18384-18394} }
OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haichao and Xu, Yi and Lu, Hongsheng and Shimizu, Takayuki and Fu, Yun}, title = {OOSTraj: Out-of-Sight Trajectory Prediction With Vision-Positioning Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14802-14811} }
FADES: Fair Disentanglement with Sensitive Relevance-
[pdf]
[supp]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Taeuk and Wang, Xiaoqian}, title = {FADES: Fair Disentanglement with Sensitive Relevance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12067-12076} }
Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Kewei and Wu, Yizheng and Cen, Jun and Pan, Zhiyu and Li, Xingyi and Wang, Zhe and Cao, Zhiguo and Lin, Guosheng}, title = {Self-Supervised Class-Agnostic Motion Prediction with Spatial and Temporal Consistency Regularizations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14638-14647} }
CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kennerley_2024_CVPR, author = {Kennerley, Mikhail and Wang, Jian-Gang and Veeravalli, Bharadwaj and Tan, Robby T.}, title = {CAT: Exploiting Inter-Class Dynamics for Domain Adaptive Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16541-16550} }
An Empirical Study of Scaling Law for Scene Text Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Rang_2024_CVPR, author = {Rang, Miao and Bi, Zhenni and Liu, Chuanjian and Wang, Yunhe and Han, Kai}, title = {An Empirical Study of Scaling Law for Scene Text Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15619-15629} }
Text2Loc: 3D Point Cloud Localization from Natural Language-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Yan and Shi, Letian and Ding, Zifeng and Henriques, Joao F. and Cremers, Daniel}, title = {Text2Loc: 3D Point Cloud Localization from Natural Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14958-14967} }
Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Phan_2024_CVPR, author = {Phan, Vu Minh Hieu and Xie, Yutong and Qi, Yuankai and Liu, Lingqiao and Liu, Liyang and Zhang, Bowen and Liao, Zhibin and Wu, Qi and To, Minh-Son and Verjans, Johan W.}, title = {Decomposing Disease Descriptions for Enhanced Pathology Detection: A Multi-Aspect Vision-Language Pre-training Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11492-11501} }
Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ziwei and Wang, Yuchen and Wang, Chuhua}, title = {Fusing Personal and Environmental Cues for Identification and Segmentation of First-Person Camera Wearers in Third-Person Views}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16477-16487} }
Desigen: A Pipeline for Controllable Design Template Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Haohan and Huang, Danqing and Qiao, Yu and Hu, Zheng and Lin, Chin-Yew and Zhang, Tong and Chen, C. L. Philip}, title = {Desigen: A Pipeline for Controllable Design Template Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12721-12732} }
Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Sanghyeok and Choi, Joonmyung and Kim, Hyunwoo J.}, title = {Multi-criteria Token Fusion with One-step-ahead Attention for Efficient Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15741-15750} }
ViewFusion: Towards Multi-View Consistency via Interpolated Denoising-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xianghui and Zuo, Yan and Ramasinghe, Sameera and Bazzani, Loris and Avraham, Gil and van den Hengel, Anton}, title = {ViewFusion: Towards Multi-View Consistency via Interpolated Denoising}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9870-9880} }
SketchINR: A First Look into Sketches as Implicit Neural Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Bhunia, Ayan Kumar and Chowdhury, Pinaki Nath and Sain, Aneeshan and Xiang, Tao and Hospedales, Timothy and Song, Yi-Zhe}, title = {SketchINR: A First Look into Sketches as Implicit Neural Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12565-12574} }
MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Junwen and Yu, Hao and Yu, Kuan-Ting and Navab, Nassir and Ilic, Slobodan and Busam, Benjamin}, title = {MatchU: Matching Unseen Objects for 6D Pose Estimation from RGB-D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10095-10105} }
Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ye and Ni, Bingbing and Liu, Jinfan and Huang, Xiaoyang and Chen, Xuanhong}, title = {Towards High-fidelity Artistic Image Vectorization via Texture-Encapsulated Shape Parameterization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15877-15886} }
EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiong_2024_CVPR, author = {Xiong, Yunyang and Varadarajan, Bala and Wu, Lemeng and Xiang, Xiaoyu and Xiao, Fanyi and Zhu, Chenchen and Dai, Xiaoliang and Wang, Dilin and Sun, Fei and Iandola, Forrest and Krishnamoorthi, Raghuraman and Chandra, Vikas}, title = {EfficientSAM: Leveraged Masked Image Pretraining for Efficient Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16111-16121} }
ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiawei and Xu, Chejian and Li, Bo}, title = {ChatScene: Knowledge-Enabled Safety-Critical Scenario Generation for Autonomous Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15459-15469} }
Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Wang, Shaofeng and Liu, Hao and Sun, Gaoyue and Wang, Yajie and Zuo, FeiFei and Quan, Chengbin and Zhao, Youjian}, title = {Teeth-SEG: An Efficient Instance Segmentation Framework for Orthodontic Treatment based on Multi-Scale Aggregation and Anthropic Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11601-11610} }
Bayesian Diffusion Models for 3D Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Haiyang and Lei, Yu and Chen, Zeyuan and Zhang, Xiang and Zhao, Yue and Wang, Yilin and Tu, Zhuowen}, title = {Bayesian Diffusion Models for 3D Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10628-10638} }
CrossKD: Cross-Head Knowledge Distillation for Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiabao and Chen, Yuming and Zheng, Zhaohui and Li, Xiang and Cheng, Ming-Ming and Hou, Qibin}, title = {CrossKD: Cross-Head Knowledge Distillation for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16520-16530} }
Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Xin and Wang, Xiaolin and Gao, Jiaxin and Wang, Jia and Luo, Zhongxuan and Liu, Risheng}, title = {Bi-level Learning of Task-Specific Decoders for Joint Registration and One-Shot Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11726-11735} }
EscherNet: A Generative Model for Scalable View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Xin and Liu, Shikun and Lyu, Xiaoyang and Taher, Marwan and Qi, Xiaojuan and Davison, Andrew J.}, title = {EscherNet: A Generative Model for Scalable View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9503-9513} }
MeaCap: Memory-Augmented Zero-shot Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Zequn and Xie, Yan and Zhang, Hao and Chen, Chiyu and Chen, Bo and Wang, Zhengjue}, title = {MeaCap: Memory-Augmented Zero-shot Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14100-14110} }
Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion-
[pdf]
[supp]
[bibtex]@InProceedings{Ai_2024_CVPR, author = {Ai, Hao and Wang, Lin}, title = {Elite360D: Towards Efficient 360 Depth Estimation via Semantic- and Distance-Aware Bi-Projection Fusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9926-9935} }
Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Qiyuan and Yang, Sibei}, title = {Curriculum Point Prompting for Weakly-Supervised Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13711-13722} }
EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Xu and Wang, Lin}, title = {EventDance: Unsupervised Source-free Cross-modal Adaptation for Event-based Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17448-17458} }
CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Wei and Tang, Yuxing and Guo, Heng and Yuan, Mingze and Mok, Tony C. W. and Yan, Ke and Yao, Jiawen and Chen, Xin and Liu, Zaiyi and Lu, Le and Zhang, Ling and Xu, Minfeng}, title = {CycleINR: Cycle Implicit Neural Representation for Arbitrary-Scale Volumetric Super-Resolution of Medical Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11631-11641} }
Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Xinpeng and Han, Jianhua and Xu, Hang and Liang, Xiaodan and Zhang, Wei and Li, Xiaomeng}, title = {Holistic Autonomous Driving Understanding by Bird's-Eye-View Injected Multi-Modal Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13668-13677} }
Extreme Point Supervised Instance Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Hyeonjun and Hwang, Sehyun and Kwak, Suha}, title = {Extreme Point Supervised Instance Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17212-17222} }
MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhan_2024_CVPR, author = {Zhan, Chenlu and Lin, Yu and Wang, Gaoang and Wang, Hongwei and Wu, Jian}, title = {MedM2G: Unifying Medical Multi-Modal Generation via Cross-Guided Diffusion with Visual Invariant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11502-11512} }
Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Das_2024_CVPR, author = {Das, Devikalyan and Wewer, Christopher and Yunus, Raza and Ilg, Eddy and Lenssen, Jan Eric}, title = {Neural Parametric Gaussians for Monocular Non-Rigid Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10715-10725} }
PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness-
[pdf]
[supp]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Siyao and Wu, Huisi and Chen, Junyang and Zhang, Qin and Qin, Jing}, title = {PH-Net: Semi-Supervised Breast Lesion Segmentation via Patch-wise Hardness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11418-11427} }
ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Jiazhou and Zheng, Xu and Lyu, Yuanhuiyi and Wang, Lin}, title = {ExACT: Language-guided Conceptual Reasoning and Uncertainty Estimation for Event-based Action Recognition and More}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18633-18643} }
Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kwon_2024_CVPR, author = {Kwon, Hyeongjun and Jang, Jinhyun and Kim, Jin and Kim, Kwonyoung and Sohn, Kwanghoon}, title = {Improving Visual Recognition with Hyperbolical Visual Hierarchy Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17364-17374} }
ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks-
[pdf]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Kai and Wang, Yunhe and Guo, Jianyuan and Wu, Enhua}, title = {ParameterNet: Parameters Are All You Need for Large-scale Visual Pretraining of Mobile Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15751-15761} }
Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ke_2024_CVPR, author = {Ke, Bingxin and Obukhov, Anton and Huang, Shengyu and Metzger, Nando and Daudt, Rodrigo Caye and Schindler, Konrad}, title = {Repurposing Diffusion-Based Image Generators for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9492-9502} }
LLMs are Good Sign Language Translators-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Jia and Foo, Lin Geng and He, Yixuan and Rahmani, Hossein and Liu, Jun}, title = {LLMs are Good Sign Language Translators}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18362-18372} }
Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wenqiao and Lv, Zheqi and Zhou, Hao and Liu, Jia-Wei and Li, Juncheng and Li, Mengze and Li, Yunfei and Zhang, Dongping and Zhuang, Yueting and Tang, Siliang}, title = {Revisiting the Domain Shift and Sample Uncertainty in Multi-source Active Domain Transfer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16751-16761} }
Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Zhenyu and Zhou, Jiahuan and Wang, Xun and Zhu, Manyu and Peng, Yuxin}, title = {Learning Continual Compatible Representation for Re-indexing Free Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16614-16623} }
CORES: Convolutional Response-based Score for Out-of-distribution Detection-
[pdf]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Keke and Hou, Chao and Peng, Weilong and Chen, Runnan and Zhu, Peican and Wang, Wenping and Tian, Zhihong}, title = {CORES: Convolutional Response-based Score for Out-of-distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10916-10925} }
Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chung_2024_CVPR, author = {Chung, Youngmin and Ha, Ji Hun and Im, Kyeong Chan and Lee, Joo Sang}, title = {Accurate Spatial Gene Expression Prediction by Integrating Multi-Resolution Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11591-11600} }
Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Su and Zhao, Cheng and Guo, Yuliang and Wang, Ruoyu and Huang, Xinyu and Chen, Yingjie Victor and Ren, Liu}, title = {Behind the Veil: Enhanced Indoor 3D Scene Reconstruction with Occluded Surfaces Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12744-12753} }
VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding-
[pdf]
[bibtex]@InProceedings{Wasim_2024_CVPR, author = {Wasim, Syed Talal and Naseer, Muzammal and Khan, Salman and Yang, Ming-Hsuan and Khan, Fahad Shahbaz}, title = {VideoGrounding-DINO: Towards Open-Vocabulary Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18909-18918} }
Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jiayi and Ma, Benteng and Cui, Hengfei and Xia, Yong}, title = {Think Twice Before Selection: Federated Evidential Active Learning for Medical Image Analysis with Domain Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11439-11449} }
ViTamin: Designing Scalable Vision Models in the Vision-Language Era-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Jieneng and Yu, Qihang and Shen, Xiaohui and Yuille, Alan and Chen, Liang-Chieh}, title = {ViTamin: Designing Scalable Vision Models in the Vision-Language Era}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12954-12966} }
Seeing the Unseen: Visual Common Sense for Semantic Placement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramrakhya_2024_CVPR, author = {Ramrakhya, Ram and Kembhavi, Aniruddha and Batra, Dhruv and Kira, Zsolt and Zeng, Kuo-Hao and Weihs, Luca}, title = {Seeing the Unseen: Visual Common Sense for Semantic Placement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16273-16283} }
LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction-
[pdf]
[supp]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Bo and Yang, Chao and Qiao, Yu and Quan, Chengbin and Zhao, Youjian}, title = {LLaMA-Excitor: General Instruction Tuning via Indirect Feature Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14089-14099} }
Steerers: A Framework for Rotation Equivariant Keypoint Descriptors-
[pdf]
[supp]
[bibtex]@InProceedings{Bokman_2024_CVPR, author = {B\"okman, Georg and Edstedt, Johan and Felsberg, Michael and Kahl, Fredrik}, title = {Steerers: A Framework for Rotation Equivariant Keypoint Descriptors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4885-4895} }
Efficient Dataset Distillation via Minimax Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Jianyang and Vahidian, Saeed and Kungurtsev, Vyacheslav and Wang, Haonan and Jiang, Wei and You, Yang and Chen, Yiran}, title = {Efficient Dataset Distillation via Minimax Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15793-15803} }
Posterior Distillation Sampling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koo_2024_CVPR, author = {Koo, Juil and Park, Chanho and Sung, Minhyuk}, title = {Posterior Distillation Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13352-13361} }
HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Haozhe and Zhao, Chen and Salzmann, Mathieu and Mathis, Alexander}, title = {HOISDF: Constraining 3D Hand-Object Pose Estimation with Global Signed Distance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10392-10402} }
DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Yuming and Xu, Hongyi and Xie, You and Song, Guoxian and Shi, Yichun and Chang, Di and Yang, Jing and Luo, Linjie}, title = {DiffPortrait3D: Controllable Diffusion for Zero-Shot Portrait View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10456-10465} }
H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Ghahremani_2024_CVPR, author = {Ghahremani, Morteza and Khateri, Mohammad and Jian, Bailiang and Wiestler, Benedikt and Adeli, Ehsan and Wachinger, Christian}, title = {H-ViT: A Hierarchical Vision Transformer for Deformable Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11513-11523} }
VideoLLM-online: Online Video Large Language Model for Streaming Video-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Joya and Lv, Zhaoyang and Wu, Shiwei and Lin, Kevin Qinghong and Song, Chenan and Gao, Difei and Liu, Jia-Wei and Gao, Ziteng and Mao, Dongxing and Shou, Mike Zheng}, title = {VideoLLM-online: Online Video Large Language Model for Streaming Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18407-18418} }
Towards Better Vision-Inspired Vision-Language Models-
[pdf]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Yun-Hao and Ji, Kaixiang and Huang, Ziyuan and Zheng, Chuanyang and Liu, Jiajia and Wang, Jian and Chen, Jingdong and Yang, Ming}, title = {Towards Better Vision-Inspired Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13537-13547} }
VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zihua and Sakuma, Hiroki and Okutomi, Masatoshi}, title = {VSRD: Instance-Aware Volumetric Silhouette Rendering for Weakly Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17354-17363} }
RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang}, title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16251-16261} }
Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection-
[pdf]
[bibtex]@InProceedings{Hui_2024_CVPR, author = {Hui, Wenjun and Zhu, Zhenfeng and Zheng, Shuai and Zhao, Yao}, title = {Endow SAM with Keen Eyes: Temporal-spatial Prompt Learning for Video Camouflaged Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19058-19067} }
Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Huan and Tan, Zichang and Tan, Chuangchuang and Wei, Yunchao and Wang, Jingdong and Zhao, Yao}, title = {Forgery-aware Adaptive Transformer for Generalizable Synthetic Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10770-10780} }
PostureHMR: Posture Transformation for 3D Human Mesh Recovery-
[pdf]
[supp]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Yu-Pei and Wu, Xiao and Yuan, Zhaoquan and Qiao, Jian-Jun and Peng, Qiang}, title = {PostureHMR: Posture Transformation for 3D Human Mesh Recovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9732-9741} }
Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xin and Liang, Dingkang and Xu, Wei and Zhu, Xingkui and Xu, Yihan and Zou, Zhikang and Bai, Xiang}, title = {Dynamic Adapter Meets Prompt Tuning: Parameter-Efficient Transfer Learning for Point Cloud Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14707-14717} }
Wonder3D: Single Image to 3D using Cross-Domain Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Long_2024_CVPR, author = {Long, Xiaoxiao and Guo, Yuan-Chen and Lin, Cheng and Liu, Yuan and Dou, Zhiyang and Liu, Lingjie and Ma, Yuexin and Zhang, Song-Hai and Habermann, Marc and Theobalt, Christian and Wang, Wenping}, title = {Wonder3D: Single Image to 3D using Cross-Domain Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9970-9980} }
RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Lingteng and Chen, Guanying and Gu, Xiaodong and Zuo, Qi and Xu, Mutian and Wu, Yushuang and Yuan, Weihao and Dong, Zilong and Bo, Liefeng and Han, Xiaoguang}, title = {RichDreamer: A Generalizable Normal-Depth Diffusion Model for Detail Richness in Text-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9914-9925} }
Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Zeyu and Zhu, Fangrui and Lao, Qianru and Jiang, Huaizu}, title = {Zero-shot Referring Expression Comprehension via Structural Similarity Between Images and Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14364-14374} }
Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zou_2024_CVPR, author = {Zou, Zi-Xin and Yu, Zhipeng and Guo, Yuan-Chen and Li, Yangguang and Liang, Ding and Cao, Yan-Pei and Zhang, Song-Hai}, title = {Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10324-10335} }
WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Youngdong and Lee, Dong In and Jang, MinHyuk and Kim, Jong Wook and Yang, Feng and Kim, Sangpil}, title = {WateRF: Robust Watermarks in Radiance Fields for Protection of Copyrights}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12087-12097} }
Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Gwon_2024_CVPR, author = {Gwon, Mi-Gyeong and Um, Gi-Mun and Cheong, Won-Sik and Kim, Wonjun}, title = {Instance-aware Contrastive Learning for Occluded Human Mesh Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10553-10562} }
Robust Noisy Correspondence Learning with Equivariant Similarity Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yuchen and Wang, Likai and Yang, Erkun and Deng, Cheng}, title = {Robust Noisy Correspondence Learning with Equivariant Similarity Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17700-17709} }
Compositional Video Understanding with Spatiotemporal Structure-based Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Yun_2024_CVPR, author = {Yun, Hoyeoung and Ahn, Jinwoo and Kim, Minseo and Kim, Eun-Sol}, title = {Compositional Video Understanding with Spatiotemporal Structure-based Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18751-18760} }
3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Xingguang and Pan, Yue and Stachniss, Cyrill and Behley, Jens}, title = {3D LiDAR Mapping in Dynamic Environments using a 4D Implicit Neural Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15417-15427} }
What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Brian and Shvetsova, Nina and Rouditchenko, Andrew and Kondermann, Daniel and Thomas, Samuel and Chang, Shih-Fu and Feris, Rogerio and Glass, James and Kuehne, Hilde}, title = {What When and Where? Self-Supervised Spatio-Temporal Grounding in Untrimmed Multi-Action Videos from Narrated Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18419-18429} }
FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Bowen and Yang, Wei and Kautz, Jan and Birchfield, Stan}, title = {FoundationPose: Unified 6D Pose Estimation and Tracking of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17868-17879} }
Hyperbolic Anomaly Detection-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Huimin and Chen, Zhentao and Xu, Yunhao and Hu, Junlin}, title = {Hyperbolic Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17511-17520} }
VLP: Vision Language Planning for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Nesti, Tommaso and Mallik, Abhirup and Allievi, Alessandro G and Velipasalar, Senem and Ren, Liu}, title = {VLP: Vision Language Planning for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14760-14769} }
ProMark: Proactive Diffusion Watermarking for Causal Attribution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Asnani_2024_CVPR, author = {Asnani, Vishal and Collomosse, John and Bui, Tu and Liu, Xiaoming and Agarwal, Shruti}, title = {ProMark: Proactive Diffusion Watermarking for Causal Attribution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10802-10811} }
Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and Fu, Yun}, title = {Consistency and Uncertainty: Identifying Unreliable Responses From Black-Box Vision-Language Models for Selective Visual Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10854-10863} }
Implicit Motion Function-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yue and Li, Jiahao and Chu, Lei and Lu, Yan}, title = {Implicit Motion Function}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19278-19289} }
MultiDiff: Consistent Novel View Synthesis from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2024_CVPR, author = {M\"uller, Norman and Schwarz, Katja and R\"ossle, Barbara and Porzi, Lorenzo and Bul\`o, Samuel Rota and Nie{\ss}ner, Matthias and Kontschieder, Peter}, title = {MultiDiff: Consistent Novel View Synthesis from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10258-10268} }
Atom-Level Optical Chemical Structure Recognition with Limited Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Oldenhof_2024_CVPR, author = {Oldenhof, Martijn and De Brouwer, Edward and Arany, Adam and Moreau, Yves}, title = {Atom-Level Optical Chemical Structure Recognition with Limited Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17669-17678} }
LiDAR-based Person Re-identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Wenxuan and Pan, Zhiyu and Liang, Yingping and Xi, Ziheng and Zhong, Zhicheng and Feng, Jianjiang and Zhou, Jie}, title = {LiDAR-based Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17437-17447} }
Model Adaptation for Time Constrained Embodied Control-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Jaehyun and Yoo, Minjong and Woo, Honguk}, title = {Model Adaptation for Time Constrained Embodied Control}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16499-16508} }
ActiveDC: Distribution Calibration for Active Finetuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Wenshuai and Hu, Zhenghui and Lu, Yu and Meng, Jinzhou and Liu, Qingjie and Wang, Yunhong}, title = {ActiveDC: Distribution Calibration for Active Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16996-17005} }
Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Jianan and Liu, Dongnan and Chang, Hang and Huang, Heng and Chen, Mei and Cai, Weidong}, title = {Seeing Unseen: Discover Novel Biomedical Concepts via Geometry-Constrained Probabilistic Modeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11524-11534} }
Communication-Efficient Federated Learning with Accelerated Client Gradient-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Geeho and Kim, Jinkyu and Han, Bohyung}, title = {Communication-Efficient Federated Learning with Accelerated Client Gradient}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12385-12394} }
LLMs are Good Action Recognizers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {LLMs are Good Action Recognizers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18395-18406} }
Interactive Continual Learning: Fast and Slow Thinking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Biqing and Chen, Xinquan and Gao, Junqi and Li, Dong and Liu, Jianxing and Wu, Ligang and Zhou, Bowen}, title = {Interactive Continual Learning: Fast and Slow Thinking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12882-12892} }
Towards Learning a Generalist Model for Embodied Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Duo and Huang, Shijia and Zhao, Lin and Zhong, Yiwu and Wang, Liwei}, title = {Towards Learning a Generalist Model for Embodied Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13624-13634} }
Splatter Image: Ultra-Fast Single-View 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Szymanowicz_2024_CVPR, author = {Szymanowicz, Stanislaw and Rupprecht, Chrisitian and Vedaldi, Andrea}, title = {Splatter Image: Ultra-Fast Single-View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10208-10217} }
Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Toubal_2024_CVPR, author = {Toubal, Imad Eddine and Avinash, Aditya and Alldrin, Neil Gordon and Dlabal, Jan and Zhou, Wenlei and Luo, Enming and Stretcu, Otilia and Xiong, Hao and Lu, Chun-Ta and Zhou, Howard and Krishna, Ranjay and Fuxman, Ariel and Duerig, Tom}, title = {Modeling Collaborator: Enabling Subjective Vision Classification With Minimal Human Effort via LLM Tool-Use}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17553-17563} }
GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Linfang and Tse, Tze Ho Elden and Wang, Chen and Sun, Yinghan and Chen, Hua and Leonardis, Ales and Zhang, Wei and Chang, Hyung Jin}, title = {GeoReF: Geometric Alignment Across Shape Variation for Category-level Object Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10693-10703} }
Learning Group Activity Features Through Person Attribute Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nakatani_2024_CVPR, author = {Nakatani, Chihiro and Kawashima, Hiroaki and Ukita, Norimichi}, title = {Learning Group Activity Features Through Person Attribute Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18233-18242} }
Plug-and-Play Diffusion Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hsiao_2024_CVPR, author = {Hsiao, Yi-Ting and Khodadadeh, Siavash and Duarte, Kevin and Lin, Wei-An and Qu, Hui and Kwon, Mingi and Kalarot, Ratheesh}, title = {Plug-and-Play Diffusion Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13743-13752} }
MindBridge: A Cross-Subject Brain Decoding Framework-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shizun and Liu, Songhua and Tan, Zhenxiong and Wang, Xinchao}, title = {MindBridge: A Cross-Subject Brain Decoding Framework}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11333-11342} }
MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chaoyi and Lin, Kevin and Yang, Zhengyuan and Wang, Jianfeng and Li, Linjie and Lin, Chung-Ching and Liu, Zicheng and Wang, Lijuan}, title = {MM-Narrator: Narrating Long-form Videos with Multimodal In-Context Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13647-13657} }
Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiyi and Mihajlovic, Marko and Wang, Shaofei and Prokudin, Sergey and Tang, Siyu}, title = {Morphable Diffusion: 3D-Consistent Diffusion for Single-image Avatar Creation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10359-10370} }
Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Young_2024_CVPR, author = {Young, Sean I. and Balbastre, Yael and Fischl, Bruce and Golland, Polina and Iglesias, Juan Eugenio}, title = {Fully Convolutional Slice-to-Volume Reconstruction for Single-Stack MRI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11535-11545} }
Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zhicai and Wei, Longhui and Wang, Tan and Chen, Heyu and Hao, Yanbin and Wang, Xiang and He, Xiangnan and Tian, Qi}, title = {Enhance Image Classification via Inter-Class Image Mixup with Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17223-17233} }
Alpha-CLIP: A CLIP Model Focusing on Wherever You Want-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zeyi and Fang, Ye and Wu, Tong and Zhang, Pan and Zang, Yuhang and Kong, Shu and Xiong, Yuanjun and Lin, Dahua and Wang, Jiaqi}, title = {Alpha-CLIP: A CLIP Model Focusing on Wherever You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13019-13029} }
ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Shuxiao and Schneider, Lukas and Cordts, Marius and Gall, Juergen}, title = {ADA-Track: End-to-End Multi-Camera 3D Multi-Object Tracking with Alternating Detection and Association}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15184-15194} }
Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Talker_2024_CVPR, author = {Talker, Lior and Cohen, Aviad and Yosef, Erez and Dana, Alexandra and Dinerstein, Michael}, title = {Mind The Edge: Refining Depth Edges in Sparsely-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10606-10616} }
Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Liu, Difan and Kang, Yan and Li, Yijun and Lin, Zhe and Jha, Niraj K. and Liu, Yuchen}, title = {Attention-Driven Training-Free Efficiency Enhancement of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16080-16089} }
CPR: Retrieval Augmented Generation for Copyright Protection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Golatkar_2024_CVPR, author = {Golatkar, Aditya and Achille, Alessandro and Zancato, Luca and Wang, Yu-Xiang and Swaminathan, Ashwin and Soatto, Stefano}, title = {CPR: Retrieval Augmented Generation for Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12374-12384} }
Vision-and-Language Navigation via Causal Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liuyi and He, Zongtao and Dang, Ronghao and Shen, Mengjiao and Liu, Chengju and Chen, Qijun}, title = {Vision-and-Language Navigation via Causal Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13139-13150} }
Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenxuan and Yue, Tongtian and Zhang, Yisi and Guo, Longteng and He, Xingjian and Wang, Xinlong and Liu, Jing}, title = {Unveiling Parts Beyond Objects: Towards Finer-Granularity Referring Expression Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12998-13008} }
Differentiable Display Photometric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Seokjun and Yoon, Seungwoo and Nam, Giljoo and Lee, Seungyong and Baek, Seung-Hwan}, title = {Differentiable Display Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11831-11840} }
In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jinseong and Choi, Yujin and Lee, Jaewook}, title = {In-distribution Public Data Synthesis with Diffusion Models for Differentially Private Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12236-12246} }
LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Tuo and Wang, Wenguan and Ma, Fan and Yang, Yi}, title = {LSK3DNet: Towards Effective and Efficient 3D Perception with Large Sparse Kernels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14916-14927} }
Diversified and Personalized Multi-rater Medical Image Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Yicheng and Luo, Xiangde and Xu, Zhe and Guo, Xiaoqing and Ju, Lie and Ge, Zongyuan and Liao, Wenjun and Cai, Jianfei}, title = {Diversified and Personalized Multi-rater Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11470-11479} }
Discover and Mitigate Multiple Biased Subgroups in Image Classifiers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zeliang and Feng, Mingqian and Li, Zhiheng and Xu, Chenliang}, title = {Discover and Mitigate Multiple Biased Subgroups in Image Classifiers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10906-10915} }
ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chakraborty_2024_CVPR, author = {Chakraborty, Rwiddhi and Sletten, Adrian and Kampffmeyer, Michael C.}, title = {ExMap: Leveraging Explainability Heatmaps for Unsupervised Group Robustness to Spurious Correlations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12017-12026} }
Learning to Segment Referred Objects from Narrated Egocentric Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Wang, Huiyu and Yang, Xitong and Feiszli, Matt and Elhamifar, Ehsan and Torresani, Lorenzo and Mavroudi, Effrosyni}, title = {Learning to Segment Referred Objects from Narrated Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14510-14520} }
Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Chaoqin and Jiang, Aofan and Feng, Jinghao and Zhang, Ya and Wang, Xinchao and Wang, Yanfeng}, title = {Adapting Visual-Language Models for Generalizable Anomaly Detection in Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11375-11385} }
Depth-aware Test-Time Training for Zero-shot Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Weihuang and Shen, Xi and Li, Haolun and Bi, Xiuli and Liu, Bo and Pun, Chi-Man and Cun, Xiaodong}, title = {Depth-aware Test-Time Training for Zero-shot Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19218-19227} }
RMem: Restricted Memory Banks Improve Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Junbao and Pang, Ziqi and Wang, Yu-Xiong}, title = {RMem: Restricted Memory Banks Improve Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18602-18611} }
Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hongjie and Dedhia, Bhishma and Jha, Niraj K.}, title = {Zero-TPrune: Zero-Shot Token Pruning through Leveraging of the Attention Graph in Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16070-16079} }
DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Hao and Liu, Huabin and Qiao, Yu and Sun, Xiao}, title = {DIBS: Enhancing Dense Video Captioning with Unlabeled Videos via Pseudo Boundary Enrichment and Online Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18699-18708} }
SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Andong and Wu, Bo and Chen, Sunli and Chen, Zhenfang and Guan, Haotian and Lee, Wei-Ning and Li, Li Erran and Gan, Chuang}, title = {SOK-Bench: A Situated Video Reasoning Benchmark with Aligned Open-World Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13384-13394} }
LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jialin and Nie, Qiang and Fu, Weifu and Lin, Yuhuan and Tao, Guangpin and Liu, Yong and Wang, Chengjie}, title = {LORS: Low-rank Residual Structure for Parameter-Efficient Network Stacking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15866-15876} }
Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhen and Tang, Jingqun and Lin, Chunhui and Wu, Binghong and Huang, Can and Liu, Hao and Tan, Xin and Zhang, Zhizhong and Xie, Yuan}, title = {Multi-modal In-Context Learning Makes an Ego-evolving Scene Text Recognizer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15567-15576} }
Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Miao_2024_CVPR, author = {Miao, Zichen and Wang, Jiang and Wang, Ze and Yang, Zhengyuan and Wang, Lijuan and Qiu, Qiang and Liu, Zicheng}, title = {Training Diffusion Models Towards Diverse Image Generation with Reinforcement Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10844-10853} }
LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Ke and Miao, Zhenwei and Jing, Wei and Liu, Weiwei and Li, Weizi and Hao, Dayang and Pan, Jia}, title = {LASIL: Learner-Aware Supervised Imitation Learning For Long-term Microscopic Traffic Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15386-15395} }
SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Abhinav and Guo, Yuliang and Huang, Xinyu and Ren, Liu and Liu, Xiaoming}, title = {SeaBird: Segmentation in Bird's View with Dice Loss Improves Monocular 3D Detection of Large Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10269-10280} }
NOPE: Novel Object Pose Estimation from a Single Image-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Ponimatkin, Georgy and Hu, Yinlin and Marlet, Renaud and Salzmann, Mathieu and Lepetit, Vincent}, title = {NOPE: Novel Object Pose Estimation from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17923-17932} }
Dual-View Visual Contextualization for Web Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kil_2024_CVPR, author = {Kil, Jihyung and Song, Chan Hee and Zheng, Boyuan and Deng, Xiang and Su, Yu and Chao, Wei-Lun}, title = {Dual-View Visual Contextualization for Web Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14445-14454} }
Language-driven Grasp Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Vuong_2024_CVPR, author = {Vuong, An Dinh and Vu, Minh Nhat and Huang, Baoru and Nguyen, Nghia and Le, Hieu and Vo, Thieu and Nguyen, Anh}, title = {Language-driven Grasp Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17902-17912} }
Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods-
[pdf]
[supp]
[bibtex]@InProceedings{Qu_2024_CVPR, author = {Qu, Chenfan and Zhong, Yiwu and Liu, Chongyu and Xu, Guitao and Peng, Dezhi and Guo, Fengjun and Jin, Lianwen}, title = {Towards Modern Image Manipulation Localization: A Large-Scale Dataset and Novel Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10781-10790} }
Object Recognition as Next Token Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Kaiyu and Chen, Bor-Chun and Geiping, Jonas and Li, Hengduo and Goldstein, Tom and Lim, Ser-Nam}, title = {Object Recognition as Next Token Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16645-16656} }
Transcriptomics-guided Slide Representation Learning in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Oldenburg, Lukas and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Peeters, Thomas and Song, Andrew H. and Mahmood, Faisal}, title = {Transcriptomics-guided Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9632-9644} }
CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow-
[pdf]
[supp]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Chenbin and Yaman, Burhaneddin and Velipasalar, Senem and Ren, Liu}, title = {CLIP-BEVFormer: Enhancing Multi-View Image-Based BEV Detector with Ground Truth Flow}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15216-15225} }
CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Zhi and Du, Yuntao and Zhang, Xintong and Ma, Xiaojian and Han, Wenjuan and Zhu, Song-Chun and Li, Qing}, title = {CLOVA: A Closed-LOop Visual Assistant with Tool Usage and Update}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13258-13268} }
Depth Prompting for Sensor-Agnostic Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jin-Hwi and Jeong, Chanhwi and Lee, Junoh and Jeon, Hae-Gon}, title = {Depth Prompting for Sensor-Agnostic Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9859-9869} }
G3DR: Generative 3D Reconstruction in ImageNet-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2024_CVPR, author = {Reddy, Pradyumna and Elezi, Ismail and Deng, Jiankang}, title = {G3DR: Generative 3D Reconstruction in ImageNet}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9655-9665} }
Hyperspherical Classification with Dynamic Label-to-Prototype Assignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saadabadi_2024_CVPR, author = {Saadabadi, Mohammad Saeed Ebrahimi and Dabouei, Ali and Malakshan, Sahar Rahimi and Nasrabadi, Nasser M.}, title = {Hyperspherical Classification with Dynamic Label-to-Prototype Assignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17333-17342} }
VTimeLLM: Empower LLM to Grasp Video Moments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Bin and Wang, Xin and Chen, Hong and Song, Zihan and Zhu, Wenwu}, title = {VTimeLLM: Empower LLM to Grasp Video Moments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14271-14280} }
FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Junyuan and Zeng, Shuang and Zhang, Miao and Wang, Runxi and Wang, Feifei and Zhou, Yuyin and Liang, Paul Pu and Qu, Liangqiong}, title = {FLHetBench: Benchmarking Device and State Heterogeneity in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12098-12108} }
Privacy-Preserving Optics for Enhancing Protection in Face De-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lopez_2024_CVPR, author = {Lopez, Jhon and Hinojosa, Carlos and Arguello, Henry and Ghanem, Bernard}, title = {Privacy-Preserving Optics for Enhancing Protection in Face De-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12120-12129} }
SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yang and Shao, Hao and Wang, Letian and Waslander, Steven L. and Li, Hongsheng and Liu, Yu}, title = {SmartRefine: A Scenario-Adaptive Refinement Framework for Efficient Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15281-15290} }
Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Menghao and Wang, Jingyu and Qi, Qi and Sun, Haifeng and Zhuang, Zirui and Ren, Pengfei and Ma, Ruilong and Liao, Jianxin}, title = {Multi-Scale Video Anomaly Detection by Multi-Grained Spatio-Temporal Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17385-17394} }
Generative Multimodal Models are In-Context Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Quan and Cui, Yufeng and Zhang, Xiaosong and Zhang, Fan and Yu, Qiying and Wang, Yueze and Rao, Yongming and Liu, Jingjing and Huang, Tiejun and Wang, Xinlong}, title = {Generative Multimodal Models are In-Context Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14398-14409} }
Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Wenhao and Zhou, Fengtao and Huang, Sheng and Zhu, Xiang and Zhang, Yi and Liu, Bo}, title = {Feature Re-Embedding: Towards Foundation Model-Level Performance in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11343-11352} }
Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zhiwei and Liu, Jing and Wu, Peng}, title = {Text Prompt with Normality Guidance for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18899-18908} }
SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Pin and Wang, Zhongdao and Wang, Guoqing and Zheng, Jilai and Ren, Xiangxuan and Feng, Bailan and Ma, Chao}, title = {SparseOcc: Rethinking Sparse Latent Representation for Vision-Based Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15035-15044} }
Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Fei and Guo, Dan and Li, Kun and Zhong, Zhun and Wang, Meng}, title = {Frequency Decoupling for Motion Magnification via Multi-Level Isomorphic Architecture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18984-18994} }
Hyperbolic Learning with Synthetic Captions for Open-World Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Fanjie and Chen, Yanbei and Cai, Jiarui and Modolo, Davide}, title = {Hyperbolic Learning with Synthetic Captions for Open-World Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16762-16771} }
Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Achille_2024_CVPR, author = {Achille, Alessandro and Steeg, Greg Ver and Liu, Tian Yu and Trager, Matthew and Klingenberg, Carson and Soatto, Stefano}, title = {Interpretable Measures of Conceptual Similarity by Complexity-Constrained Descriptive Auto-Encoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11062-11071} }
3D Feature Tracking via Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Siqi and Zhou, Zhikuan and Xue, Zhou and Li, Yipeng and Du, Shaoyi and Gao, Yue}, title = {3D Feature Tracking via Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18974-18983} }
MaxQ: Multi-Axis Query for N:M Sparsity Network-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiang_2024_CVPR, author = {Xiang, Jingyang and Li, Siqi and Chen, Junhao and Chen, Zhuangzhi and Huang, Tianxin and Peng, Linpeng and Liu, Yong}, title = {MaxQ: Multi-Axis Query for N:M Sparsity Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15845-15854} }
Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Anqi and Ke, Qiuhong and Gong, Mingming and Bailey, James}, title = {Part-aware Unified Representation of Language and Skeleton for Zero-shot Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18761-18770} }
Composing Object Relations and Attributes for Image-Text Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pham_2024_CVPR, author = {Pham, Khoi and Huynh, Chuong and Lim, Ser-Nam and Shrivastava, Abhinav}, title = {Composing Object Relations and Attributes for Image-Text Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14354-14363} }
Previously on ... From Recaps to Story Summarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Singh_2024_CVPR, author = {Singh, Aditya Kumar and Srivastava, Dhruv and Tapaswi, Makarand}, title = {Previously on ... From Recaps to Story Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13635-13646} }
mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration-
[pdf]
[supp]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Qinghao and Xu, Haiyang and Ye, Jiabo and Yan, Ming and Hu, Anwen and Liu, Haowei and Qian, Qi and Zhang, Ji and Huang, Fei}, title = {mPLUG-Owl2: Revolutionizing Multi-modal Large Language Model with Modality Collaboration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13040-13051} }
Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Rongjie and Wu, Yu and He, Xuming}, title = {Learning by Correction: Efficient Tuning Task for Zero-Shot Generative Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13428-13437} }
Supervised Anomaly Detection for Complex Industrial Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Baitieva_2024_CVPR, author = {Baitieva, Aimira and Hurych, David and Besnier, Victor and Bernard, Olivier}, title = {Supervised Anomaly Detection for Complex Industrial Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17754-17762} }
Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koch_2024_CVPR, author = {Koch, Sebastian and Vaskevicius, Narunas and Colosi, Mirco and Hermosilla, Pedro and Ropinski, Timo}, title = {Open3DSG: Open-Vocabulary 3D Scene Graphs from Point Clouds with Queryable Objects and Open-Set Relationships}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14183-14193} }
SURE: SUrvey REcipes for building reliable and robust deep networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuting and Chen, Yingyi and Yu, Xuanlong and Chen, Dexiong and Shen, Xi}, title = {SURE: SUrvey REcipes for building reliable and robust deep networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17500-17510} }
PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ruoqi and Chen, Zhuoyang and Zhu, Jiayi and Luo, Qiong and Wang, Feng}, title = {PolarRec: Improving Radio Interferometric Data Reconstruction Using Polar Coordinates}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12841-12850} }
Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pasca_2024_CVPR, author = {Pasca, Razvan-George and Gavryushin, Alexey and Hamza, Muhammad and Kuo, Yen-Ling and Mo, Kaichun and Van Gool, Luc and Hilliges, Otmar and Wang, Xi}, title = {Summarize the Past to Predict the Future: Natural Language Descriptions of Context Boost Multimodal Object Interaction Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18286-18296} }
Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuqi and Luo, Han and Lei, Yinjie}, title = {Towards CLIP-driven Language-free 3D Visual Grounding via 2D-3D Relational Enhancement and Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13063-13072} }
Optimal Transport Aggregation for Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Izquierdo_2024_CVPR, author = {Izquierdo, Sergio and Civera, Javier}, title = {Optimal Transport Aggregation for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17658-17668} }
Aligning and Prompting Everything All at Once for Universal Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yunhang and Fu, Chaoyou and Chen, Peixian and Zhang, Mengdan and Li, Ke and Sun, Xing and Wu, Yunsheng and Lin, Shaohui and Ji, Rongrong}, title = {Aligning and Prompting Everything All at Once for Universal Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13193-13203} }
Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Mingcheng and Yang, Dingkang and Zhao, Xiao and Wang, Shuaibing and Wang, Yan and Yang, Kun and Sun, Mingyang and Kou, Dongliang and Qian, Ziyun and Zhang, Lihua}, title = {Correlation-Decoupled Knowledge Distillation for Multimodal Sentiment Analysis with Incomplete Modalities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12458-12468} }
LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Linfeng and Shi, Miaojing and Yue, Zijie and Chen, Qijun}, title = {LoSh: Long-Short Text Joint Prediction Network for Referring Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14001-14010} }
Dual Prototype Attention for Unsupervised Video Object Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Suhwan and Lee, Minhyeok and Lee, Seunghoon and Lee, Dogyoon and Choi, Heeseung and Kim, Ig-Jae and Lee, Sangyoun}, title = {Dual Prototype Attention for Unsupervised Video Object Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19238-19247} }
Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yining and Sun, Junjie and Wang, Chenyue and Zhang, Mi and Yang, Min}, title = {Navigate Beyond Shortcuts: Debiased Learning Through the Lens of Neural Collapse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12322-12331} }
A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Feng and Zhang, Teng and Lerman, Gilad}, title = {A Subspace-Constrained Tyler's Estimator and its Applications to Structure from Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14575-14584} }
CAD: Photorealistic 3D Generation via Adversarial Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2024_CVPR, author = {Wan, Ziyu and Paschalidou, Despoina and Huang, Ian and Liu, Hongyu and Shen, Bokui and Xiang, Xiaoyu and Liao, Jing and Guibas, Leonidas}, title = {CAD: Photorealistic 3D Generation via Adversarial Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10194-10207} }
Enhancing Vision-Language Pre-training with Rich Supervisions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yuan and Shi, Kunyu and Zhu, Pengkai and Belval, Edouard and Nuriel, Oren and Appalaraju, Srikar and Ghadar, Shabnam and Tu, Zhuowen and Mahadevan, Vijay and Soatto, Stefano}, title = {Enhancing Vision-Language Pre-training with Rich Supervisions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13480-13491} }
Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pan_2024_CVPR, author = {Pan, Youqi and Zhou, Wugen and Cao, Yingdian and Zha, Hongbin}, title = {Adaptive VIO: Deep Visual-Inertial Odometry with Online Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18019-18028} }
Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Shitong and Yin, Zeyuan and Zhou, Muxin and Zhang, Xindong and Shen, Zhiqiang}, title = {Generalized Large-Scale Data Condensation via Various Backbone and Statistical Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16709-16718} }
On Train-Test Class Overlap and Detection for Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Yoon, Jooyoung and Hwang, Taebaek and Choi, Shunghyun and Gu, Yeong Hyeon and Avrithis, Yannis}, title = {On Train-Test Class Overlap and Detection for Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17375-17384} }
AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Fan and Chen, Tianyi and He, Xiaosheng and Cai, Zhongang and Yang, Lei and Wu, Si and Lin, Guosheng}, title = {AttriHuman-3D: Editable 3D Human Avatar Generation with Attribute Decomposition and Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10596-10605} }
Learning Object State Changes in Videos: An Open-World Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Zihui and Ashutosh, Kumar and Grauman, Kristen}, title = {Learning Object State Changes in Videos: An Open-World Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18493-18503} }
SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Zhixuan and Schaldenbrand, Peter and Okogwu, Beverley-Claire and Peng, Wenxuan and Yun, Youngsik and Hundt, Andrew and Kim, Jihie and Oh, Jean}, title = {SCoFT: Self-Contrastive Fine-Tuning for Equitable Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10822-10832} }
Iterated Learning Improves Compositionality in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chenhao and Zhang, Jieyu and Kembhavi, Aniruddha and Krishna, Ranjay}, title = {Iterated Learning Improves Compositionality in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13785-13795} }
Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiao and Wang, Shiao and Tang, Chuanming and Zhu, Lin and Jiang, Bo and Tian, Yonghong and Tang, Jin}, title = {Event Stream-based Visual Object Tracking: A High-Resolution Benchmark Dataset and A Novel Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19248-19257} }
Dual DETRs for Multi-Label Temporal Action Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yuhan and Zhang, Guozhen and Tan, Jing and Wu, Gangshan and Wang, Limin}, title = {Dual DETRs for Multi-Label Temporal Action Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18559-18569} }
Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiahan and Dong, Jiuyang and Huang, Shenjin and Li, Xi and Jiang, Junjun and Fan, Xiaopeng and Zhang, Yongbing}, title = {Virtual Immunohistochemistry Staining for Histological Images Assisted by Weakly-supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11259-11268} }
DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yunxiao and Singh, Manish Kumar and Cai, Hong and Porikli, Fatih}, title = {DeCoTR: Enhancing Depth Completion with 2D and 3D Attentions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10736-10746} }
Utility-Fairness Trade-Offs and How to Find Them-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dehdashtian_2024_CVPR, author = {Dehdashtian, Sepehr and Sadeghi, Bashir and Boddeti, Vishnu Naresh}, title = {Utility-Fairness Trade-Offs and How to Find Them}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12037-12046} }
SAOR: Single-View Articulated Object Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Aygun_2024_CVPR, author = {Aygun, Mehmet and Mac Aodha, Oisin}, title = {SAOR: Single-View Articulated Object Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10382-10391} }
A Theory of Joint Light and Heat Transport for Lambertian Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Ramanagopal_2024_CVPR, author = {Ramanagopal, Mani and Narayanan, Sriram and Sankaranarayanan, Aswin C. and Narasimhan, Srinivasa G.}, title = {A Theory of Joint Light and Heat Transport for Lambertian Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11924-11933} }
iKUN: Speak to Trackers without Retraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Yunhao and Lei, Cheng and Zhao, Zhicheng and Su, Fei}, title = {iKUN: Speak to Trackers without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19135-19144} }
Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kuang_2024_CVPR, author = {Kuang, Zhenzhong and Yang, Xiaochen and Shen, Yingjie and Hu, Chao and Yu, Jun}, title = {Facial Identity Anonymization via Intrinsic and Extrinsic Attention Distraction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12406-12415} }
3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Songchun and Zhang, Yibo and Zheng, Quan and Ma, Rui and Hua, Wei and Bao, Hujun and Xu, Weiwei and Zou, Changqing}, title = {3D-SceneDreamer: Text-Driven 3D-Consistent Scene Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10170-10180} }
VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources-
[pdf]
[supp]
[bibtex]@InProceedings{Fei_2024_CVPR, author = {Fei, Fan and Tang, Jiajun and Tan, Ping and Shi, Boxin}, title = {VMINer: Versatile Multi-view Inverse Rendering with Near- and Far-field Light Sources}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11800-11809} }
RoHM: Robust Human Motion Reconstruction via Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Siwei and Bhatnagar, Bharat Lal and Xu, Yuanlu and Winkler, Alexander and Kadlecek, Petr and Tang, Siyu and Bogo, Federica}, title = {RoHM: Robust Human Motion Reconstruction via Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14606-14617} }
Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minkuk and Kim, Hyeon Bae and Moon, Jinyoung and Choi, Jinwoo and Kim, Seong Tae}, title = {Do You Remember? Dense Video Captioning with Cross-Modal Memory Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13894-13904} }
SPAD: Spatially Aware Multi-View Diffusers-
[pdf]
[supp]
[bibtex]@InProceedings{Kant_2024_CVPR, author = {Kant, Yash and Siarohin, Aliaksandr and Wu, Ziyi and Vasilkovsky, Michael and Qian, Guocheng and Ren, Jian and Guler, Riza Alp and Ghanem, Bernard and Tulyakov, Sergey and Gilitschenski, Igor}, title = {SPAD: Spatially Aware Multi-View Diffusers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10026-10038} }
Gradient Reweighting: Towards Imbalanced Class-Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Jiangpeng}, title = {Gradient Reweighting: Towards Imbalanced Class-Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16668-16677} }
Gaussian Splatting SLAM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Matsuki_2024_CVPR, author = {Matsuki, Hidenobu and Murai, Riku and Kelly, Paul H.J. and Davison, Andrew J.}, title = {Gaussian Splatting SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18039-18048} }
Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jae Hyeon and Lee, Gyoomin and Park, Seunggi and Cho, Sung In}, title = {Not All Classes Stand on Same Embeddings: Calibrating a Semantic Distance with Metric Tensor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17722-17731} }
A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Papalampidi_2024_CVPR, author = {Papalampidi, Pinelopi and Koppula, Skanda and Pathak, Shreya and Chiu, Justin and Heyward, Joe and Patraucean, Viorica and Shen, Jiajun and Miech, Antoine and Zisserman, Andrew and Nematzdeh, Aida}, title = {A Simple Recipe for Contrastively Pre-training Video-First Encoders Beyond 16 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14386-14397} }
Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xiao and Patidar, Sumit and Haughton, Iain and James, Stephen}, title = {Hierarchical Diffusion Policy for Kinematics-Aware Multi-Task Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18081-18090} }
Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Runhao and Chen, Xiaoyong and Liang, Jiaming and Wu, Huisi and Cao, Guangzhong and Guo, Yong}, title = {Benchmarking the Robustness of Temporal Action Detection Models Against Temporal Corruptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18263-18274} }
Open-World Human-Object Interaction Detection via Multi-modal Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jie and Li, Bingliang and Zeng, Ailing and Zhang, Lei and Zhang, Ruimao}, title = {Open-World Human-Object Interaction Detection via Multi-modal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16954-16964} }
UniMODE: Unified Monocular 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuoling and Xu, Xiaogang and Lim, SerNam and Zhao, Hengshuang}, title = {UniMODE: Unified Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16561-16570} }
Multi-agent Collaborative Perception via Motion-aware Robust Communication Network-
[pdf]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Shixin and Liu, Yu and Li, Zhi and Li, Shaohui and He, You}, title = {Multi-agent Collaborative Perception via Motion-aware Robust Communication Network}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15301-15310} }
The Manga Whisperer: Automatically Generating Transcriptions for Comics-
[pdf]
[arXiv]
[bibtex]@InProceedings{Sachdeva_2024_CVPR, author = {Sachdeva, Ragav and Zisserman, Andrew}, title = {The Manga Whisperer: Automatically Generating Transcriptions for Comics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12967-12976} }
Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Heng and Zhao, Qiuyu and Zheng, Linyu and Zeng, Hao and Ge, Zhiwei and Li, Tianhao and Xu, Sulong}, title = {Exploring Region-Word Alignment in Built-in Detector for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16975-16984} }
MovieChat: From Dense Token to Sparse Memory for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Enxin and Chai, Wenhao and Wang, Guanhong and Zhang, Yucheng and Zhou, Haoyang and Wu, Feiyang and Chi, Haozhe and Guo, Xun and Ye, Tian and Zhang, Yanting and Lu, Yan and Hwang, Jenq-Neng and Wang, Gaoang}, title = {MovieChat: From Dense Token to Sparse Memory for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18221-18232} }
Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Mingqi and Khorram, Saeed and Fuxin, Li}, title = {Comparing the Decision-Making Mechanisms by Transformers and CNNs via Explanation Methods}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9546-9555} }
Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and You, Shaodi and Li, Yu and Fu, Ying}, title = {Atlantis: Enabling Underwater Depth Estimation with Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11852-11861} }
Matching Anything by Segmenting Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Siyuan and Ke, Lei and Danelljan, Martin and Piccinelli, Luigi and Segu, Mattia and Van Gool, Luc and Yu, Fisher}, title = {Matching Anything by Segmenting Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18963-18973} }
Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiacheng and Li, Jiaming and Lin, Xiangru and Zhang, Wei and Tan, Xiao and Han, Junyu and Ding, Errui and Wang, Jingdong and Li, Guanbin}, title = {Decoupled Pseudo-labeling for Semi-Supervised Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16923-16932} }
Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Ming and Gould, Stephen}, title = {Temporally Consistent Unbalanced Optimal Transport for Unsupervised Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14618-14627} }
Learning Transferable Negative Prompts for Out-of-Distribution Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Tianqi and Pang, Guansong and Bai, Xiao and Miao, Wenjun and Zheng, Jin}, title = {Learning Transferable Negative Prompts for Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17584-17594} }
Holistic Features are almost Sufficient for Text-to-Video Retrieval-
[pdf]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Kaibin and Zhao, Ruixiang and Xin, Zijie and Lan, Bangxiang and Li, Xirong}, title = {Holistic Features are almost Sufficient for Text-to-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17138-17147} }
Uncertainty-aware Action Decoupling Transformer for Action Anticipation-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Hongji and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon and Ji, Qiang}, title = {Uncertainty-aware Action Decoupling Transformer for Action Anticipation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18644-18654} }
One-Prompt to Segment All Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junde and Xu, Min}, title = {One-Prompt to Segment All Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11302-11312} }
GROUNDHOG: Grounding Large Language Models to Holistic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yichi and Ma, Ziqiao and Gao, Xiaofeng and Shakiah, Suhaila and Gao, Qiaozi and Chai, Joyce}, title = {GROUNDHOG: Grounding Large Language Models to Holistic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14227-14238} }
Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jialin and Hu, Xia and Wang, Yaqing and Pang, Bo and Soricut, Radu}, title = {Omni-SMoLA: Boosting Generalist Multimodal Models with Soft Mixture of Low-rank Experts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14205-14215} }
SeMoLi: What Moves Together Belongs Together-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seidenschwarz_2024_CVPR, author = {Seidenschwarz, Jenny and Osep, Aljosa and Ferroni, Francesco and Lucey, Simon and Leal-Taixe, Laura}, title = {SeMoLi: What Moves Together Belongs Together}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14685-14694} }
Context-Guided Spatio-Temporal Video Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Xin and Fan, Heng and Huang, Yan and Luo, Tiejian and Zhang, Libo}, title = {Context-Guided Spatio-Temporal Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18330-18339} }
Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Padmanabhan_2024_CVPR, author = {Padmanabhan, Namitha and Gwilliam, Matthew and Kumar, Pulkit and Maiya, Shishira R and Ehrlich, Max and Shrivastava, Abhinav}, title = {Explaining the Implicit Neural Canvas: Connecting Pixels to Neurons by Tracing their Contributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10957-10967} }
Adapting to Length Shift: FlexiLength Network for Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Yi and Fu, Yun}, title = {Adapting to Length Shift: FlexiLength Network for Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15226-15237} }
WorDepth: Variational Language Prior for Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Ziyao and Wang, Daniel and Yang, Fengyu and Park, Hyoungseob and Soatto, Stefano and Lao, Dong and Wong, Alex}, title = {WorDepth: Variational Language Prior for Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9708-9719} }
A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuelin and Zheng, Pengyu and Yan, Wanquan and Fang, Chengyu and Cheng, Shing Shin}, title = {A Unified Framework for Microscopy Defocus Deblur with Multi-Pyramid Transformer and Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11125-11136} }
Frozen Feature Augmentation for Few-Shot Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Bar_2024_CVPR, author = {B\"ar, Andreas and Houlsby, Neil and Dehghani, Mostafa and Kumar, Manoj}, title = {Frozen Feature Augmentation for Few-Shot Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16046-16057} }
Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Buettner_2024_CVPR, author = {Buettner, Kyle and Malakouti, Sina and Li, Xiang Lorraine and Kovashka, Adriana}, title = {Incorporating Geo-Diverse Knowledge into Prompting for Increased Geographical Robustness in Object Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13515-13524} }
PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dorkenwald_2024_CVPR, author = {Dorkenwald, Michael and Barazani, Nimrod and Snoek, Cees G. M. and Asano, Yuki M.}, title = {PIN: Positional Insert Unlocks Object Localisation Abilities in VLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13548-13558} }
UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ruihai and Lu, Haoran and Wang, Yiyan and Wang, Yubo and Dong, Hao}, title = {UniGarmentManip: A Unified Framework for Category-Level Garment Manipulation via Dense Visual Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16340-16350} }
Multi-Attribute Interactions Matter for 3D Visual Grounding-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Can and Han, Yuehui and Xu, Rui and Hui, Le and Xie, Jin and Yang, Jian}, title = {Multi-Attribute Interactions Matter for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17253-17262} }
SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yunhao and Wang, Xiaodong and Wang, Ping and Yuan, Xin and Liu, Peidong}, title = {SCINeRF: Neural Radiance Fields from a Snapshot Compressive Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10542-10552} }
Improved Visual Grounding through Self-Consistent Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Ruozhen and Cascante-Bonilla, Paola and Yang, Ziyan and Berg, Alexander C. and Ordonez, Vicente}, title = {Improved Visual Grounding through Self-Consistent Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13095-13105} }
DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiuming and Wang, Guangming and Ye, Weicai and Jiang, Chaokang and Han, Jinru and Liu, Zhe and Zhang, Guofeng and Du, Dalong and Wang, Hesheng}, title = {DifFlow3D: Toward Robust Uncertainty-Aware Scene Flow Estimation with Iterative Diffusion-Based Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15109-15119} }
FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lin and Zhao, Tianchen and Lin, Zinan and Ning, Xuefei and Dai, Guohao and Yang, Huazhong and Wang, Yu}, title = {FlashEval: Towards Fast and Accurate Evaluation of Text-to-image Diffusion Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16122-16131} }
View From Above: Orthogonal-View aware Cross-view Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shan and Nguyen, Chuong and Liu, Jiawei and Zhang, Yanhao and Muthu, Sundaram and Maken, Fahira Afzal and Zhang, Kaihao and Li, Hongdong}, title = {View From Above: Orthogonal-View aware Cross-view Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14843-14852} }
PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haosong and Leong, Mei Chee and Li, Liyuan and Lin, Weisi}, title = {PeVL: Pose-Enhanced Vision-Language Model for Fine-Grained Human Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18857-18867} }
DeepCache: Accelerating Diffusion Models for Free-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Xinyin and Fang, Gongfan and Wang, Xinchao}, title = {DeepCache: Accelerating Diffusion Models for Free}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15762-15772} }
Learning Correlation Structures for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Manjin and Seo, Paul Hongsuck and Schmid, Cordelia and Cho, Minsu}, title = {Learning Correlation Structures for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18941-18951} }
PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Ruining and Liu, Quan and Cui, Can and Yao, Tianyuan and Yue, Jialin and Xiong, Juming and Yu, Lining and Wu, Yifei and Yin, Mengmeng and Wang, Yu and Zhao, Shilin and Tang, Yucheng and Yang, Haichun and Huo, Yuankai}, title = {PrPSeg: Universal Proposition Learning for Panoramic Renal Pathology Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11736-11746} }
Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Rachavarapu_2024_CVPR, author = {Rachavarapu, Kranthi Kumar and Ramakrishnan, Kalyan and N., Rajagopalan A.}, title = {Weakly-Supervised Audio-Visual Video Parsing with Prototype-based Pseudo-Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18952-18962} }
Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering-
[pdf]
[supp]
[bibtex]@InProceedings{Gopalakrishnan_2024_CVPR, author = {Gopalakrishnan, Vivek and Dey, Neel and Golland, Polina}, title = {Intraoperative 2D/3D Image Registration via Differentiable X-ray Rendering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11662-11672} }
MICap: A Unified Model for Identity-Aware Movie Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Raajesh_2024_CVPR, author = {Raajesh, Haran and Desanur, Naveen Reddy and Khan, Zeeshan and Tapaswi, Makarand}, title = {MICap: A Unified Model for Identity-Aware Movie Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14011-14021} }
MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Hegde, Deepti and Patel, Vishal M.}, title = {MonoDiff: Monocular 3D Object Detection and Pose Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10659-10670} }
An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jianqing and Liu, Yang and Hua, Yang and Cao, Jian}, title = {An Upload-Efficient Scheme for Transferring Knowledge From a Server-Side Pre-trained Generator to Clients in Heterogeneous Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12109-12119} }
Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Xiaohan and Wang, Min and Zhou, Wengang and Li, Li and Li, Houqiang}, title = {Instance-aware Exploration-Verification-Exploitation for Instance ImageGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16329-16339} }
One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Minghua and Shi, Ruoxi and Chen, Linghao and Zhang, Zhuoyang and Xu, Chao and Wei, Xinyue and Chen, Hansheng and Zeng, Chong and Gu, Jiayuan and Su, Hao}, title = {One-2-3-45++: Fast Single Image to 3D Objects with Consistent Multi-View Generation and 3D Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10072-10083} }
Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhong_2024_CVPR, author = {Zhong, Shanshan and Huang, Zhongzhan and Gao, Shanghua and Wen, Wushao and Lin, Liang and Zitnik, Marinka and Zhou, Pan}, title = {Let's Think Outside the Box: Exploring Leap-of-Thought in Large Language Models with Creative Humor Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13246-13257} }
SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Delitzas_2024_CVPR, author = {Delitzas, Alexandros and Takmaz, Ayca and Tombari, Federico and Sumner, Robert and Pollefeys, Marc and Engelmann, Francis}, title = {SceneFun3D: Fine-Grained Functionality and Affordance Understanding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14531-14542} }
Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Wei and Wan, Chaoqun and Liu, Tongliang and Tian, Xinmei and Shen, Xu and Ye, Jieping}, title = {Enhanced Motion-Text Alignment for Image-to-Video Transfer Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18504-18515} }
UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hong and Feng, Yutang and Xue, Song and Liu, Xuhui and Zeng, Bohan and Li, Shanglin and Liu, Boyu and Liu, Jianzhuang and Han, Shumin and Zhang, Baochang}, title = {UV-IDM: Identity-Conditioned Latent Diffusion Model for Face UV-Texture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10585-10595} }
A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zexian and Wu, Dayan and Wu, Chenming and Lin, Zheng and Gu, Jingzi and Wang, Weiping}, title = {A Pedestrian is Worth One Prompt: Towards Language Guidance Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17343-17353} }
NetTrack: Tracking Highly Dynamic Objects with a Net-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Guangze and Lin, Shijie and Zuo, Haobo and Fu, Changhong and Pan, Jia}, title = {NetTrack: Tracking Highly Dynamic Objects with a Net}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19145-19155} }
Grounded Question-Answering in Long Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Di_2024_CVPR, author = {Di, Shangzhe and Xie, Weidi}, title = {Grounded Question-Answering in Long Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12934-12943} }
HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Xiaolong and Kan, Meina and Shan, Shiguang and Ji, Zhilong and Bai, Jinfeng and Chen, Xilin}, title = {HPNet: Dynamic Trajectory Forecasting with Historical Prediction Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15261-15270} }
SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology-
[pdf]
[supp]
[bibtex]@InProceedings{Kapse_2024_CVPR, author = {Kapse, Saarthak and Pati, Pushpak and Das, Srijan and Zhang, Jingwei and Chen, Chao and Vakalopoulou, Maria and Saltz, Joel and Samaras, Dimitris and Gupta, Rajarsi R. and Prasanna, Prateek}, title = {SI-MIL: Taming Deep MIL for Self-Interpretability in Gigapixel Histopathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11226-11237} }
LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding-
[pdf]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Min and Ma, Jia-Wei and Zhu, Xiaobin and Qin, Jingyan and Yin, Xu-Cheng}, title = {LayoutFormer: Hierarchical Text Detection Towards Scene Text Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15665-15674} }
GLOW: Global Layout Aware Attacks on Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Bao_2024_CVPR, author = {Bao, Jun and Liu, Buyu and Ren, Kui and Yu, Jun}, title = {GLOW: Global Layout Aware Attacks on Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12057-12066} }
SIRA: Scalable Inter-frame Relation and Association for Radar Perception-
[pdf]
[supp]
[bibtex]@InProceedings{Yataka_2024_CVPR, author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros and Takahashi, Ryuhei}, title = {SIRA: Scalable Inter-frame Relation and Association for Radar Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15024-15034} }
VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tran_2024_CVPR, author = {Tran, Phong and Zakharov, Egor and Ho, Long-Nhat and Tran, Anh Tuan and Hu, Liwen and Li, Hao}, title = {VOODOO 3D: Volumetric Portrait Disentanglement For One-Shot 3D Head Reenactment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10336-10348} }
Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ge_2024_CVPR, author = {Ge, Yunhao and Zeng, Xiaohui and Huffman, Jacob Samuel and Lin, Tsung-Yi and Liu, Ming-Yu and Cui, Yin}, title = {Visual Fact Checker: Enabling High-Fidelity Detailed Caption Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14033-14042} }
Communication-Efficient Collaborative Perception via Information Filling with Codebook-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Yue and Peng, Juntong and Liu, Sifei and Ge, Junhao and Liu, Si and Chen, Siheng}, title = {Communication-Efficient Collaborative Perception via Information Filling with Codebook}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15481-15490} }
MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Hanzhe and Zhou, Zhizhuo and Jampani, Varun and Tulsiani, Shubham}, title = {MVD-Fusion: Single-view 3D via Depth-consistent Multi-view Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9698-9707} }
Effective Video Mirror Detection with Inconsistent Motion Cues-
[pdf]
[supp]
[bibtex]@InProceedings{Warren_2024_CVPR, author = {Warren, Alex and Xu, Ke and Lin, Jiaying and Tam, Gary K.L. and Lau, Rynson W.H.}, title = {Effective Video Mirror Detection with Inconsistent Motion Cues}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17244-17252} }
DiffLoc: Diffusion Model for Outdoor LiDAR Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wen and Yang, Yuyang and Yu, Shangshu and Hu, Guosheng and Wen, Chenglu and Cheng, Ming and Wang, Cheng}, title = {DiffLoc: Diffusion Model for Outdoor LiDAR Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15045-15054} }
On Scaling Up a Multilingual Vision and Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xi and Djolonga, Josip and Padlewski, Piotr and Mustafa, Basil and Changpinyo, Soravit and Wu, Jialin and Ruiz, Carlos Riquelme and Goodman, Sebastian and Wang, Xiao and Tay, Yi and Shakeri, Siamak and Dehghani, Mostafa and Salz, Daniel and Lucic, Mario and Tschannen, Michael and Nagrani, Arsha and Hu, Hexiang and Joshi, Mandar and Pang, Bo and Montgomery, Ceslee and Pietrzyk, Paulina and Ritter, Marvin and Piergiovanni, AJ and Minderer, Matthias and Pavetic, Filip and Waters, Austin and Li, Gang and Alabdulmohsin, Ibrahim and Beyer, Lucas and Amelot, Julien and Lee, Kenton and Steiner, Andreas Peter and Li, Yang and Keysers, Daniel and Arnab, Anurag and Xu, Yuanzhong and Rong, Keran and Kolesnikov, Alexander and Seyedhosseini, Mojtaba and Angelova, Anelia and Zhai, Xiaohua and Houlsby, Neil and Soricut, Radu}, title = {On Scaling Up a Multilingual Vision and Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14432-14444} }
Day-Night Cross-domain Vehicle Re-identification-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongchao and Chen, Jingong and Zheng, Aihua and Wu, Yong and Luo, Yonglong}, title = {Day-Night Cross-domain Vehicle Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12626-12635} }
Holodeck: Language Guided Generation of 3D Embodied AI Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yue and Sun, Fan-Yun and Weihs, Luca and VanderBilt, Eli and Herrasti, Alvaro and Han, Winson and Wu, Jiajun and Haber, Nick and Krishna, Ranjay and Liu, Lingjie and Callison-Burch, Chris and Yatskar, Mark and Kembhavi, Aniruddha and Clark, Christopher}, title = {Holodeck: Language Guided Generation of 3D Embodied AI Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16227-16237} }
Distilled Datamodel with Reverse Gradient Matching-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Jingwen and Yu, Ruonan and Liu, Songhua and Wang, Xinchao}, title = {Distilled Datamodel with Reverse Gradient Matching}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11954-11963} }
Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhanwei and Chen, Minghao and Xiao, Shuai and Peng, Liang and Li, Hengjia and Lin, Binbin and Li, Ping and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Pseudo Label Refinery for Unsupervised Domain Adaptation on Cross-dataset 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15291-15300} }
Reconstructing Hands in 3D with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pavlakos_2024_CVPR, author = {Pavlakos, Georgios and Shan, Dandan and Radosavovic, Ilija and Kanazawa, Angjoo and Fouhey, David and Malik, Jitendra}, title = {Reconstructing Hands in 3D with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9826-9836} }
PELA: Learning Parameter-Efficient Models with Low-Rank Approximation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Yangyang and Wang, Guangzhi and Kankanhalli, Mohan}, title = {PELA: Learning Parameter-Efficient Models with Low-Rank Approximation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15699-15709} }
Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xidong and Gao, Shangqian and Zhang, Zeyu and Li, Zhenzhen and Bao, Runxue and Zhang, Yanfu and Wang, Xiaoqian and Huang, Heng}, title = {Auto-Train-Once: Controller Network Guided Automatic Network Pruning from Scratch}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16163-16173} }
Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Qinghe and Zhang, Jian and Qi, Lei and Yu, Qian and Shi, Yinghuan and Gao, Yang}, title = {Constructing and Exploring Intermediate Domains in Mixed Domain Semi-supervised Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11642-11651} }
From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yong-Lu and Wu, Xiaoqian and Liu, Xinpeng and Wang, Zehao and Dou, Yiming and Ji, Yikun and Zhang, Junyi and Li, Yixing and Lu, Xudong and Tan, Jingru and Lu, Cewu}, title = {From Isolated Islands to Pangea: Unifying Semantic Space for Human Action Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16582-16592} }
Bootstrapping Autonomous Driving Radars with Self-Supervised Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hao_2024_CVPR, author = {Hao, Yiduo and Madani, Sohrab and Guan, Junfeng and Alloulah, Mohammed and Gupta, Saurabh and Hassanieh, Haitham}, title = {Bootstrapping Autonomous Driving Radars with Self-Supervised Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15012-15023} }
Weakly Supervised Monocular 3D Detection with a Single-View Image-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Xueying and Jin, Sheng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Weakly Supervised Monocular 3D Detection with a Single-View Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10508-10518} }
Blind Image Quality Assessment Based on Geometric Order Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Shin_2024_CVPR, author = {Shin, Nyeong-Ho and Lee, Seon-Ho and Kim, Chang-Su}, title = {Blind Image Quality Assessment Based on Geometric Order Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12799-12808} }
Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Chen, Ying and Chen, Yifei and Yu, Rongshan and Yang, Wenxian and Wang, Liansheng and Ding, Bowen and Han, Yuchen}, title = {Generalizable Whole Slide Image Classification with Fine-Grained Visual-Semantic Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11398-11407} }
Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Haoxiang and Shi, Modi and Gao, Boyang and Huang, Di}, title = {Generalizing 6-DoF Grasp Detection via Domain Prior Knowledge}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18102-18111} }
RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bialer_2024_CVPR, author = {Bialer, Oded and Haitman, Yuval}, title = {RadSimReal: Bridging the Gap Between Synthetic and Real Data in Radar Object Detection With Simulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15407-15416} }
3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Chaokang and Wang, Guangming and Liu, Jiuming and Wang, Hesheng and Ma, Zhuang and Liu, Zhenqiang and Liang, Zhujin and Shan, Yi and Du, Dalong}, title = {3DSFLabelling: Boosting 3D Scene Flow Estimation by Pseudo Auto-labelling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15173-15183} }
Question Aware Vision Transformer for Multimodal Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganz_2024_CVPR, author = {Ganz, Roy and Kittenplon, Yair and Aberdam, Aviad and Ben Avraham, Elad and Nuriel, Oren and Mazor, Shai and Litman, Ron}, title = {Question Aware Vision Transformer for Multimodal Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13861-13871} }
OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tongjia and Yu, Hongshan and Yang, Zhengeng and Li, Zechuan and Sun, Wei and Chen, Chen}, title = {OST: Refining Text Knowledge with Optimal Spatio-Temporal Descriptor for General Video Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18888-18898} }
Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Mao, Yongsen and Jiang, Hanxiao and Haresh, Sanjay and Shacklett, Brennan and Batra, Dhruv and Clegg, Alexander and Undersander, Eric and Chang, Angel X. and Savva, Manolis}, title = {Habitat Synthetic Scenes Dataset (HSSD-200): An Analysis of 3D Scene Scale and Realism Tradeoffs for ObjectGoal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16384-16393} }
NViST: In the Wild New View Synthesis from a Single Image with Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Wonbong and Agapito, Lourdes}, title = {NViST: In the Wild New View Synthesis from a Single Image with Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10181-10193} }
Step Differences in Instructional Video-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagarajan_2024_CVPR, author = {Nagarajan, Tushar and Torresani, Lorenzo}, title = {Step Differences in Instructional Video}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18740-18750} }
Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Lihe and Kang, Bingyi and Huang, Zilong and Xu, Xiaogang and Feng, Jiashi and Zhao, Hengshuang}, title = {Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10371-10381} }
MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization-
[pdf]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jimin and Wang, Tianbao and Jin, Tao and Zhang, Shengyu and Fu, Dongjie and Wang, Zhe and Lyu, Jiangjing and Lv, Chengfei and Niu, Chaoyue and Yu, Zhou and Zhao, Zhou and Wu, Fei}, title = {MPOD123: One Image to 3D Content Generation Using Mask-enhanced Progressive Outline-to-Detail Optimization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10682-10692} }
UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Shuaibo and Ma, Wei and Guo, Jianwei and Xu, Shibiao and Li, Benchong and Zhang, Xiaopeng}, title = {UnionFormer: Unified-Learning Transformer with Multi-View Representation for Image Manipulation Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12523-12533} }
Situational Awareness Matters in 3D Vision Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Man_2024_CVPR, author = {Man, Yunze and Gui, Liang-Yan and Wang, Yu-Xiong}, title = {Situational Awareness Matters in 3D Vision Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13678-13688} }
RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Zhiwei and Liu, Zhe and Xia, Zhongyu and Wang, Xinhao and Wang, Yongtao and Qi, Shengxiang and Dong, Yang and Dong, Nan and Zhang, Le and Zhu, Ce}, title = {RCBEVDet: Radar-camera Fusion in Bird's Eye View for 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14928-14937} }
Adaptive Softassign via Hadamard-Equipped Sinkhorn-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Binrui and Niu, Qiang and Zhu, Shengxin}, title = {Adaptive Softassign via Hadamard-Equipped Sinkhorn}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17638-17647} }
Re-thinking Data Availability Attacks Against Deep Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Bin and Li, Bo and Wu, Shuang and Ding, Shouhong and Yi, Ran and Ma, Lizhuang}, title = {Re-thinking Data Availability Attacks Against Deep Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12215-12224} }
SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Mingxuan and Hayes, Tyler L. and Ricci, Elisa and Csurka, Gabriela and Volpi, Riccardo}, title = {SHiNe: Semantic Hierarchy Nexus for Open-vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16634-16644} }
Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Tianming and Tan, Chaolei and Xia, Beihao and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Ranking Distillation for Open-Ended Video Question Answering with Insufficient Labels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13161-13170} }
Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Liqiong and Yang, Jinyu and Zhang, Yanfu and Wang, Fangyi and Zheng, Feng}, title = {Depth-Aware Concealed Crop Detection in Dense Agricultural Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17201-17211} }
Solving the Catastrophic Forgetting Problem in Generalized Category Discovery-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Xinzi and Zheng, Xiawu and Wang, Guanhong and Yu, Weijiang and Shen, Yunhang and Li, Ke and Lu, Yutong and Tian, Yonghong}, title = {Solving the Catastrophic Forgetting Problem in Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16880-16889} }
Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, JungEun and Yoon, Hangyul and Park, Geondo and Kim, Kyungsu and Yang, Eunho}, title = {Data-Efficient Unsupervised Interpolation Without Any Intermediate Frame for 4D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11353-11364} }
Learning the 3D Fauna of the Web-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zizhang and Litvak, Dor and Li, Ruining and Zhang, Yunzhi and Jakab, Tomas and Rupprecht, Christian and Wu, Shangzhe and Vedaldi, Andrea and Wu, Jiajun}, title = {Learning the 3D Fauna of the Web}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9752-9762} }
LISA: Reasoning Segmentation via Large Language Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Xin and Tian, Zhuotao and Chen, Yukang and Li, Yanwei and Yuan, Yuhui and Liu, Shu and Jia, Jiaya}, title = {LISA: Reasoning Segmentation via Large Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9579-9589} }
Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Yicheng and Luo, Zhuoyan and Liu, Yong and Ma, Yue and Bian, Hengwei and Ji, Yatai and Yang, Yujiu and Li, Xiu}, title = {Bridging the Gap: A Unified Video Comprehension Framework for Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18709-18719} }
MuseChat: A Conversational Music Recommendation System for Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Zhikang and Liu, Xiulong and Chen, Bin and Polak, Pawel and Zhang, Peng}, title = {MuseChat: A Conversational Music Recommendation System for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12775-12785} }
Device-Wise Federated Network Pruning-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Li, Junyi and Zhang, Zeyu and Zhang, Yanfu and Cai, Weidong and Huang, Heng}, title = {Device-Wise Federated Network Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12342-12352} }
MoReVQA: Exploring Modular Reasoning Models for Video Question Answering-
[pdf]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Juhong and Buch, Shyamal and Nagrani, Arsha and Cho, Minsu and Schmid, Cordelia}, title = {MoReVQA: Exploring Modular Reasoning Models for Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13235-13245} }
Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach-
[pdf]
[supp]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Wei and Zhang, Xing and Chen, Bihui and Yan, Dawei and Lin, Zhijun and Yan, Qingsen and Wang, Peng and Yang, Yang}, title = {Low-Rank Rescaled Vision Transformer Fine-Tuning: A Residual Design Approach}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16101-16110} }
Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Kunlun and Zou, Xu and Peng, Yuxin and Zhou, Jiahuan}, title = {Distribution-aware Knowledge Prototyping for Non-exemplar Lifelong Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16604-16613} }
Generating Enhanced Negatives for Training Language-Based Object Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Zhao, Long and G, Vijay Kumar B and Suh, Yumin and Metaxas, Dimitris N. and Chandraker, Manmohan and Schulter, Samuel}, title = {Generating Enhanced Negatives for Training Language-Based Object Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13592-13602} }
FedAS: Bridging Inconsistency in Personalized Federated Learning-
[pdf]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Xiyuan and Huang, Wenke and Ye, Mang}, title = {FedAS: Bridging Inconsistency in Personalized Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11986-11995} }
MoST: Multi-Modality Scene Tokenization for Motion Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Norman and Ji, Jingwei and Yang, Zhenpei and Harada, Nate and Tang, Haotian and Chen, Kan and Qi, Charles R. and Ge, Runzhou and Goel, Kratarth and Yang, Zoey and Ettinger, Scott and Al-Rfou, Rami and Anguelov, Dragomir and Zhou, Yin}, title = {MoST: Multi-Modality Scene Tokenization for Motion Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14988-14999} }
PIGEON: Predicting Image Geolocations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Haas_2024_CVPR, author = {Haas, Lukas and Skreta, Michal and Alberti, Silas and Finn, Chelsea}, title = {PIGEON: Predicting Image Geolocations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12893-12902} }
Flow-Guided Online Stereo Rectification for Wide Baseline Stereo-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Anush and Mannan, Fahim and Jafari, Omid Hosseini and Li, Shile and Heide, Felix}, title = {Flow-Guided Online Stereo Rectification for Wide Baseline Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15375-15385} }
Driving Everywhere with Large Language Model Policy Adaptation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Boyi and Wang, Yue and Mao, Jiageng and Ivanovic, Boris and Veer, Sushant and Leung, Karen and Pavone, Marco}, title = {Driving Everywhere with Large Language Model Policy Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14948-14957} }
Koala: Key Frame-Conditioned Long Video-LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Reuben and Sun, Ximeng and Hu, Ping and Wang, Jui-hsien and Deilamsalehy, Hanieh and Plummer, Bryan A. and Russell, Bryan and Saenko, Kate}, title = {Koala: Key Frame-Conditioned Long Video-LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13581-13591} }
HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guan_2024_CVPR, author = {Guan, Tianrui and Liu, Fuxiao and Wu, Xiyang and Xian, Ruiqi and Li, Zongxia and Liu, Xiaoyu and Wang, Xijun and Chen, Lichang and Huang, Furong and Yacoob, Yaser and Manocha, Dinesh and Zhou, Tianyi}, title = {HallusionBench: An Advanced Diagnostic Suite for Entangled Language Hallucination and Visual Illusion in Large Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14375-14385} }
ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Yichen and Han, Zongbo and Cao, Bing and Jiang, Xiaoheng and Hu, Qinghua and Zhang, Changqing}, title = {ID-like Prompt Learning for Few-Shot Out-of-Distribution Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17480-17489} }
Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pramanick_2024_CVPR, author = {Pramanick, Shraman and Han, Guangxing and Hou, Rui and Nag, Sayan and Lim, Ser-Nam and Ballas, Nicolas and Wang, Qifan and Chellappa, Rama and Almahairi, Amjad}, title = {Jack of All Tasks Master of Many: Designing General-Purpose Coarse-to-Fine Vision-Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14076-14088} }
SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Yunfei and Zhao, Tianyu and Wang, Guidong}, title = {SchurVINS: Schur Complement-Based Lightweight Visual Inertial Navigation System}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17964-17973} }
ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Mu and Liu, Haotian and Mustikovela, Siva Karthik and Meyer, Gregory P. and Chai, Yuning and Park, Dennis and Lee, Yong Jae}, title = {ViP-LLaVA: Making Large Multimodal Models Understand Arbitrary Visual Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12914-12923} }
OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Ganlong and Li, Guanbin and Chen, Weikai and Yu, Yizhou}, title = {OVER-NAV: Elevating Iterative Vision-and-Language Navigation with Open-Vocabulary Detection and StructurEd Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16296-16306} }
All Rivers Run to the Sea: Private Learning with Asymmetric Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niu_2024_CVPR, author = {Niu, Yue and Ali, Ramy E. and Prakash, Saurav and Avestimehr, Salman}, title = {All Rivers Run to the Sea: Private Learning with Asymmetric Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12353-12362} }
HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hao and Li, Haipeng and Wang, Yinqiao and Liu, Shuaicheng and Fu, Chi-Wing}, title = {HandBooster: Boosting 3D Hand-Mesh Reconstruction by Conditional Synthesis and Sampling of Hand-Object Interactions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10159-10169} }
A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hanshi and Zhang, Zhipeng and Gao, Jin and Hu, Weiming}, title = {A-Teacher: Asymmetric Network for 3D Semi-Supervised Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14978-14987} }
Visual Objectification in Films: Towards a New AI Task for Video Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tores_2024_CVPR, author = {Tores, Julie and Sassatelli, Lucile and Wu, Hui-Yin and Bergman, Clement and Andolfi, L\'ea and Ecrement, Victor and Precioso, Fr\'ed\'eric and Devars, Thierry and Guaresi, Magali and Julliard, Virginie and Lecossais, Sarah}, title = {Visual Objectification in Films: Towards a New AI Task for Video Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10864-10874} }
BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Minje and Kim, Tae-Kyun}, title = {BiTT: Bi-directional Texture Reconstruction of Interacting Two Hands from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10726-10735} }
Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Kanchana and Shukla, Satya Narayan and Poursaeed, Omid and Ryoo, Michael S. and Lin, Tsung-Yu}, title = {Learning to Localize Objects Improves Spatial Reasoning in Visual-LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12977-12987} }
Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ristea_2024_CVPR, author = {Ristea, Nicolae-C?t?lin and Croitoru, Florinel-Alin and Ionescu, Radu Tudor and Popescu, Marius and Khan, Fahad Shahbaz and Shah, Mubarak}, title = {Self-Distilled Masked Auto-Encoders are Efficient Video Anomaly Detectors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15984-15995} }
Distilling Vision-Language Models on Millions of Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yue and Zhao, Long and Zhou, Xingyi and Wu, Jialin and Chu, Chun-Te and Miao, Hui and Schroff, Florian and Adam, Hartwig and Liu, Ting and Gong, Boqing and Krahenbuhl, Philipp and Yuan, Liangzhe}, title = {Distilling Vision-Language Models on Millions of Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13106-13116} }
Generalized Predictive Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jiazhi and Gao, Shenyuan and Qiu, Yihang and Chen, Li and Li, Tianyu and Dai, Bo and Chitta, Kashyap and Wu, Penghao and Zeng, Jia and Luo, Ping and Zhang, Jun and Geiger, Andreas and Qiao, Yu and Li, Hongyang}, title = {Generalized Predictive Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14662-14672} }
FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Elhamifar, Ehsan}, title = {FACT: Frame-Action Cross-Attention Temporal Modeling for Efficient Action Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18175-18185} }
Test-Time Zero-Shot Temporal Action Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liberatori_2024_CVPR, author = {Liberatori, Benedetta and Conti, Alessandro and Rota, Paolo and Wang, Yiming and Ricci, Elisa}, title = {Test-Time Zero-Shot Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18720-18729} }
AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One-
[pdf]
[supp]
[bibtex]@InProceedings{Ranzinger_2024_CVPR, author = {Ranzinger, Mike and Heinrich, Greg and Kautz, Jan and Molchanov, Pavlo}, title = {AM-RADIO: Agglomerative Vision Foundation Model Reduce All Domains Into One}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12490-12500} }
FastMAC: Stochastic Spectral Sampling of Correspondence Graph-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yifei and Zhao, Hao and Li, Hongyang and Chen, Siheng}, title = {FastMAC: Stochastic Spectral Sampling of Correspondence Graph}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17857-17867} }
FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Gihun and Jeong, Minchan and Kim, Sangmook and Oh, Jaehoon and Yun, Se-Young}, title = {FedSOL: Stabilized Orthogonal Learning with Proximal Restrictions in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12512-12522} }
A Category Agnostic Model for Visual Rearrangment-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yuyi and Song, Xinhang and Li, Weijie and Wang, Xiaohan and Jiang, Shuqiang}, title = {A Category Agnostic Model for Visual Rearrangment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16457-16466} }
Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Taher_2024_CVPR, author = {Taher, Mohammad Reza Hosseinzadeh and Gotway, Michael B. and Liang, Jianming}, title = {Representing Part-Whole Hierarchies in Foundation Models by Learning Localizability Composability and Decomposability from Anatomy via Self Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11269-11281} }
Efficient Test-Time Adaptation of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Karmanov_2024_CVPR, author = {Karmanov, Adilbek and Guan, Dayan and Lu, Shijian and El Saddik, Abdulmotaleb and Xing, Eric}, title = {Efficient Test-Time Adaptation of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14162-14171} }
Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tong_2024_CVPR, author = {Tong, Shengbang and Liu, Zhuang and Zhai, Yuexiang and Ma, Yi and LeCun, Yann and Xie, Saining}, title = {Eyes Wide Shut? Exploring the Visual Shortcomings of Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9568-9578} }
Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhuangzhuang and Lai, Zhuonan and Chen, Jie and Li, Jianqiang}, title = {Mind Marginal Non-Crack Regions: Clustering-Inspired Representation Learning for Crack Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12698-12708} }
RegionGPT: Towards Region Understanding Vision Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Qiushan and De Mello, Shalini and Yin, Hongxu and Byeon, Wonmin and Cheung, Ka Chun and Yu, Yizhou and Luo, Ping and Liu, Sifei}, title = {RegionGPT: Towards Region Understanding Vision Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13796-13806} }
Error Detection in Egocentric Procedural Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Shih-Po and Lu, Zijia and Zhang, Zekun and Hoai, Minh and Elhamifar, Ehsan}, title = {Error Detection in Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18655-18666} }
Uncertainty-Guided Never-Ending Learning to Drive-
[pdf]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Lei and Ohn-Bar, Eshed and Arora, Sanjay and Yi, John Seon Keun}, title = {Uncertainty-Guided Never-Ending Learning to Drive}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15088-15098} }
FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cazenavette_2024_CVPR, author = {Cazenavette, George and Sud, Avneesh and Leung, Thomas and Usman, Ben}, title = {FakeInversion: Learning to Detect Images from Unseen Text-to-Image Models by Inverting Stable Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10759-10769} }
Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yan and Zhang, Zhang and Wu, Qiang and Zhong, Yi and Wang, Liang}, title = {Attribute-Guided Pedestrian Retrieval: Bridging Person Re-ID with Internal Attribute Variability}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17689-17699} }
Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jiamian and Sun, Guohao and Wang, Pichao and Liu, Dongfang and Dianat, Sohail and Rabbani, Majid and Rao, Raghuveer and Tao, Zhiqiang}, title = {Text Is MASS: Modeling as Stochastic Embedding for Text-Video Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16551-16560} }
Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Rui and Fischer, Tobias and Segu, Mattia and Pollefeys, Marc and Van Gool, Luc and Tombari, Federico}, title = {Know Your Neighbors: Improving Single-View Reconstruction via Spatial Vision-Language Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9848-9858} }
Preserving Fairness Generalization in Deepfake Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Li and He, Xinan and Ju, Yan and Wang, Xin and Ding, Feng and Hu, Shu}, title = {Preserving Fairness Generalization in Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16815-16825} }
Structure-Aware Sparse-View X-ray 3D Reconstruction-
[pdf]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Yuanhao and Wang, Jiahao and Yuille, Alan and Zhou, Zongwei and Wang, Angtian}, title = {Structure-Aware Sparse-View X-ray 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11174-11183} }
Dexterous Grasp Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Guo-Hao and Wei, Yi-Lin and Zheng, Dian and Wu, Xiao-Ming and Zheng, Wei-Shi}, title = {Dexterous Grasp Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17933-17942} }
EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Sijie and Guo, Zhicheng and Wu, Jingwen and Fang, Kechen and Li, Peng and Liu, Huaping and Liu, Yang}, title = {EgoThink: Evaluating First-Person Perspective Thinking Capability of Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14291-14302} }
Hearing Anything Anywhere-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Mason Long and Sawata, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun}, title = {Hearing Anything Anywhere}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11790-11799} }
PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhenyu and Bhat, Shariq Farooq and Wonka, Peter}, title = {PatchFusion: An End-to-End Tile-Based Framework for High-Resolution Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10016-10025} }
Retrieval-Augmented Egocentric Video Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jilan and Huang, Yifei and Hou, Junlin and Chen, Guo and Zhang, Yuejie and Feng, Rui and Xie, Weidi}, title = {Retrieval-Augmented Egocentric Video Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13525-13536} }
SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Zhixuan and Mu, Yao and Ma, Hengbo and Tomizuka, Masayoshi and Ding, Mingyu and Luo, Ping}, title = {SkillDiffuser: Interpretable Hierarchical Planning via Skill Abstractions in Diffusion-Based Task Execution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16467-16476} }
TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Ho-Joong and Hong, Jung-Ho and Kong, Heejo and Lee, Seong-Whan}, title = {TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18837-18846} }
PointBeV: A Sparse Approach for BeV Predictions-
[pdf]
[supp]
[bibtex]@InProceedings{Chambon_2024_CVPR, author = {Chambon, Loick and Zablocki, Eloi and Chen, Micka\"el and Bartoccioni, Florent and P\'erez, Patrick and Cord, Matthieu}, title = {PointBeV: A Sparse Approach for BeV Predictions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15195-15204} }
From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Jaeho and Bello, Juan Luis Gonzalez and Kwon, Byeongjun and Kim, Munchurl}, title = {From-Ground-To-Objects: Coarse-to-Fine Self-supervised Monocular Depth Estimation of Dynamic Objects with Ground Contact Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10519-10529} }
SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling-
[pdf]
[supp]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Ju-Hee and Kang, Je-Won}, title = {SRTube: Video-Language Pre-Training with Action-Centric Video Tube Features and Semantic Role Labeling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13689-13699} }
Prompt Highlighter: Interactive Control for Multi-Modal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuechen and Qian, Shengju and Peng, Bohao and Liu, Shu and Jia, Jiaya}, title = {Prompt Highlighter: Interactive Control for Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13215-13224} }
Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy-
[pdf]
[bibtex]@InProceedings{Kang_2024_CVPR, author = {Kang, DaeJun and Kum, Dongsuk and Kim, Sanmin}, title = {Continual Learning for Motion Prediction Model via Meta-Representation Learning and Optimal Memory Buffer Retention Strategy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15438-15448} }
EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Xuanyu and Li, Runyi and Yu, Jiwen and Xu, Youmin and Li, Weiqi and Zhang, Jian}, title = {EditGuard: Versatile Image Watermarking for Tamper Localization and Copyright Protection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11964-11974} }
FairRAG: Fair Human Generation via Fair Retrieval Augmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shrestha_2024_CVPR, author = {Shrestha, Robik and Zou, Yang and Chen, Qiuyu and Li, Zhiheng and Xie, Yusheng and Deng, Siqi}, title = {FairRAG: Fair Human Generation via Fair Retrieval Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11996-12005} }
Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Xianghui and Bhatnagar, Bharat Lal and Lenssen, Jan Eric and Pons-Moll, Gerard}, title = {Template Free Reconstruction of Human-object Interaction with Procedural Interaction Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10003-10015} }
Open-Vocabulary Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Peng and Zhou, Xuerong and Pang, Guansong and Sun, Yujia and Liu, Jing and Wang, Peng and Zhang, Yanning}, title = {Open-Vocabulary Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18297-18307} }
ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting-
[pdf]
[arXiv]
[bibtex]@InProceedings{Duan_2024_CVPR, author = {Duan, Chen and Fu, Pei and Guo, Shan and Jiang, Qianyi and Wei, Xiaoming}, title = {ODM: A Text-Image Further Alignment Pre-training Approach for Scene Text Detection and Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15587-15597} }
Epistemic Uncertainty Quantification For Pre-Trained Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Hanjing and Ji, Qiang}, title = {Epistemic Uncertainty Quantification For Pre-Trained Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11052-11061} }
Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Brian and Su, Huangyuan and Gkanatsios, Nikolaos and Ke, Tsung-Wei and Jain, Ayush and Schneider, Jeff and Fragkiadaki, Katerina}, title = {Diffusion-ES: Gradient-free Planning with Diffusion for Autonomous and Instruction-guided Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15342-15353} }
MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuelong and Mao, Yafei and Bala, Raja and Hadap, Sunil}, title = {MRC-Net: 6-DoF Pose Estimation with MultiScale Residual Correlation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10476-10486} }
MonoCD: Monocular 3D Object Detection with Complementary Depths-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Longfei and Yan, Pei and Xiong, Shengzhou and Xiang, Xuanyu and Tan, Yihua}, title = {MonoCD: Monocular 3D Object Detection with Complementary Depths}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10248-10257} }
Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zike and Zhou, Pan and Yi, Xuanyu and Yuan, Xiaoding and Zhang, Hanwang}, title = {Consistent3D: Towards Consistent High-Fidelity Text-to-3D Generation with Deterministic Sampling Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9892-9902} }
ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaoqi and Zhang, Mingxu and Geng, Yiran and Geng, Haoran and Long, Yuxing and Shen, Yan and Zhang, Renrui and Liu, Jiaming and Dong, Hao}, title = {ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18061-18070} }
GLaMM: Pixel Grounding Large Multimodal Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rasheed_2024_CVPR, author = {Rasheed, Hanoona and Maaz, Muhammad and Shaji, Sahal and Shaker, Abdelrahman and Khan, Salman and Cholakkal, Hisham and Anwer, Rao M. and Xing, Eric and Yang, Ming-Hsuan and Khan, Fahad S.}, title = {GLaMM: Pixel Grounding Large Multimodal Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13009-13018} }
Incremental Residual Concept Bottleneck Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Chenming and Zhou, Shiji and Zhang, Hengyuan and Ni, Xinzhe and Yang, Yujiu and Wang, Yuwang}, title = {Incremental Residual Concept Bottleneck Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11030-11040} }
SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ehsani_2024_CVPR, author = {Ehsani, Kiana and Gupta, Tanmay and Hendrix, Rose and Salvador, Jordi and Weihs, Luca and Zeng, Kuo-Hao and Singh, Kunal Pratap and Kim, Yejin and Han, Winson and Herrasti, Alvaro and Krishna, Ranjay and Schwenk, Dustin and VanderBilt, Eli and Kembhavi, Aniruddha}, title = {SPOC: Imitating Shortest Paths in Simulation Enables Effective Navigation and Manipulation in the Real World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16238-16250} }
LoCoNet: Long-Short Context Network for Active Speaker Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xizi and Cheng, Feng and Bertasius, Gedas}, title = {LoCoNet: Long-Short Context Network for Active Speaker Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18462-18472} }
D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yi and Lin, Yihong and Cai, Wenjie and Xu, Xuemiao and Zhang, Huaidong and Du, Yong and He, Shengfeng}, title = {D3still: Decoupled Differential Distillation for Asymmetric Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17181-17190} }
Learning Triangular Distribution in Visual World-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ping and Zhang, Xingpeng and Zhou, Chengtao and Fan, Dichao and Tu, Peng and Zhang, Le and Qian, Yanlin}, title = {Learning Triangular Distribution in Visual World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11019-11029} }
DiaLoc: An Iterative Approach to Embodied Dialog Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chao and Li, Mohan and Budvytis, Ignas and Liwicki, Stephan}, title = {DiaLoc: An Iterative Approach to Embodied Dialog Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12585-12593} }
Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Khan_2024_CVPR, author = {Khan, Zaid and BG, Vijay Kumar and Schulter, Samuel and Fu, Yun and Chandraker, Manmohan}, title = {Self-Training Large Language Models for Improved Visual Program Synthesis With Visual Reinforcement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14344-14353} }
MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhe and Yang, Laurence T. and Ren, Bocheng and Nie, Xin and Gao, Zhangyang and Tan, Cheng and Li, Stan Z.}, title = {MLIP: Enhancing Medical Visual Representation with Divergence Encoder and Knowledge-guided Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11704-11714} }
Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Al-lahham_2024_CVPR, author = {Al-lahham, Anas and Zaheer, Muhammad Zaigham and Tastan, Nurbek and Nandakumar, Karthik}, title = {Collaborative Learning of Anomalies with Privacy (CLAP) for Unsupervised Video Anomaly Detection: A New Baseline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12416-12425} }
Resource-Efficient Transformer Pruning for Finetuning of Large Models-
[pdf]
[supp]
[bibtex]@InProceedings{Ilhan_2024_CVPR, author = {Ilhan, Fatih and Su, Gong and Tekin, Selim Furkan and Huang, Tiansheng and Hu, Sihao and Liu, Ling}, title = {Resource-Efficient Transformer Pruning for Finetuning of Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16206-16215} }
Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Costanzino_2024_CVPR, author = {Costanzino, Alex and Ramirez, Pierluigi Zama and Lisanti, Giuseppe and Di Stefano, Luigi}, title = {Multimodal Industrial Anomaly Detection by Crossmodal Feature Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17234-17243} }
FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bulat_2024_CVPR, author = {Bulat, Adrian and Ouali, Yassine and Tzimiropoulos, Georgios}, title = {FFF: Fixing Flawed Foundations in Contrastive Pre-Training Results in Very Strong Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14172-14182} }
Low-power Continuous Remote Behavioral Localization with Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hamann_2024_CVPR, author = {Hamann, Friedhelm and Ghosh, Suman and Martinez, Ignacio Juarez and Hart, Tom and Kacelnik, Alex and Gallego, Guillermo}, title = {Low-power Continuous Remote Behavioral Localization with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18612-18621} }
SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tao and He, Runyu and Wu, Gangshan and Wang, Limin}, title = {SportsHHI: A Dataset for Human-Human Interaction Detection in Sports Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18537-18546} }
CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ranasinghe_2024_CVPR, author = {Ranasinghe, Yasiru and Nair, Nithin Gopalakrishnan and Bandara, Wele Gedara Chaminda and Patel, Vishal M.}, title = {CrowdDiff: Multi-hypothesis Crowd Density Estimation using Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12809-12819} }
Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yuanzhen and Luo, Fei and Xiao, Chunxia}, title = {Diffusion-FOF: Single-View Clothed Human Reconstruction via Diffusion-Based Fourier Occupancy Field}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9525-9534} }
ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images-
[pdf]
[supp]
[bibtex]@InProceedings{Schmidt-Mengin_2024_CVPR, author = {Schmidt-Mengin, Marius and Benichoux, Alexis and Belachew, Shibeshih and Komodakis, Nikos and Paragios, Nikos}, title = {ToNNO: Tomographic Reconstruction of a Neural Network's Output for Weakly Supervised Segmentation of 3D Medical Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11428-11438} }
Learning to Navigate Efficiently and Precisely in Real Environments-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bono_2024_CVPR, author = {Bono, Guillaume and Poirier, Herv\'e and Antsfeld, Leonid and Monaci, Gianluca and Chidlovskii, Boris and Wolf, Christian}, title = {Learning to Navigate Efficiently and Precisely in Real Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17837-17846} }
VkD: Improving Knowledge Distillation using Orthogonal Projections-
[pdf]
[supp]
[bibtex]@InProceedings{Miles_2024_CVPR, author = {Miles, Roy and Elezi, Ismail and Deng, Jiankang}, title = {VkD: Improving Knowledge Distillation using Orthogonal Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15720-15730} }
LaRE^2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yunpeng and Du, Junlong and Yan, Ke and Ding, Shouhong}, title = {LaRE{\textasciicircum}2: Latent Reconstruction Error Based Method for Diffusion-Generated Image Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17006-17015} }
T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Daehee and Jeong, Jaeseok and Yoon, Sung-Hoon and Jeong, Jaewoo and Yoon, Kuk-Jin}, title = {T4P: Test-Time Training of Trajectory Prediction via Masked Autoencoder and Actor-specific Token Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15065-15076} }
InstaGen: Enhancing Object Detection by Training on Synthetic Dataset-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Chengjian and Zhong, Yujie and Jie, Zequn and Xie, Weidi and Ma, Lin}, title = {InstaGen: Enhancing Object Detection by Training on Synthetic Dataset}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14121-14130} }
Visual Point Cloud Forecasting enables Scalable Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Chen, Li and Sun, Yanan and Li, Hongyang}, title = {Visual Point Cloud Forecasting enables Scalable Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14673-14684} }
Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhuowan and Jasani, Bhavan and Tang, Peng and Ghadar, Shabnam}, title = {Synthesize Step-by-Step: Tools Templates and LLMs as Data Generators for Reasoning-Based Chart VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13613-13623} }
LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Chuwei and Shen, Yufan and Zhu, Zhaoqing and Zheng, Qi and Yu, Zhi and Yao, Cong}, title = {LayoutLLM: Layout Instruction Tuning with Large Language Models for Document Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15630-15640} }
ProTeCt: Prompt Tuning for Taxonomic Open Set Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tz-Ying and Ho, Chih-Hui and Vasconcelos, Nuno}, title = {ProTeCt: Prompt Tuning for Taxonomic Open Set Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16531-16540} }
Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kraus_2024_CVPR, author = {Kraus, Oren and Kenyon-Dean, Kian and Saberian, Saber and Fallah, Maryam and McLean, Peter and Leung, Jess and Sharma, Vasudev and Khan, Ayla and Balakrishnan, Jia and Celik, Safiye and Beaini, Dominique and Sypetkowski, Maciej and Cheng, Chi Vicky and Morse, Kristen and Makes, Maureen and Mabey, Ben and Earnshaw, Berton}, title = {Masked Autoencoders for Microscopy are Scalable Learners of Cellular Biology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11757-11768} }
Segment and Caption Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Xiaoke and Wang, Jianfeng and Tang, Yansong and Zhang, Zheng and Hu, Han and Lu, Jiwen and Wang, Lijuan and Liu, Zicheng}, title = {Segment and Caption Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13405-13417} }
Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Sabokrou, Mohammad and Rasouli, Amir}, title = {Adversarial Backdoor Attack by Naturalistic Data Poisoning on Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14885-14894} }
Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs-
[pdf]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Lin and Chen, Yukang and Yang, Shuai and Ding, Xiaohan and Ge, Yixiao and Chen, Ying-Cong and Shan, Ying}, title = {Low-Rank Approximation for Sparse Attention in Multi-Modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13763-13773} }
TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaopei and Hou, Yuenan and Huang, Xiaoshui and Lin, Binbin and He, Tong and Zhu, Xinge and Ma, Yuexin and Wu, Boxi and Liu, Haifeng and Cai, Deng and Ouyang, Wanli}, title = {TASeg: Temporal Aggregation Network for LiDAR Semantic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15311-15320} }
Bootstrapping SparseFormers from Vision Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Ziteng and Tong, Zhan and Lin, Kevin Qinghong and Chen, Joya and Shou, Mike Zheng}, title = {Bootstrapping SparseFormers from Vision Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17710-17721} }
EventPS: Real-Time Photometric Stereo Using an Event Camera-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Bohan and Ren, Jieji and Han, Jin and Wang, Feishi and Liang, Jinxiu and Shi, Boxin}, title = {EventPS: Real-Time Photometric Stereo Using an Event Camera}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9602-9611} }
On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Kaituo and Li, Changsheng and Ren, Dongchun and Yuan, Ye and Wang, Guoren}, title = {On the Road to Portability: Compressing End-to-End Motion Planner for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15099-15108} }
PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding-
[pdf]
[bibtex]@InProceedings{Nie_2024_CVPR, author = {Nie, Xuesong and Jin, Haoyuan and Yan, Yunfeng and Chen, Xi and Zhu, Zhihang and Qi, Donglian}, title = {PredToken: Predicting Unknown Tokens and Beyond with Coarse-to-Fine Iterative Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18143-18152} }
FairCLIP: Harnessing Fairness in Vision-Language Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Yan and Shi, Min and Khan, Muhammad Osama and Afzal, Muhammad Muneeb and Huang, Hao and Yuan, Shuaihang and Tian, Yu and Song, Luo and Kouhana, Ava and Elze, Tobias and Fang, Yi and Wang, Mengyu}, title = {FairCLIP: Harnessing Fairness in Vision-Language Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12289-12301} }
StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Yining and Jiang, Kun and Wang, Ke and Li, Jiusi and Wang, Yunlong and Yang, Mengmeng and Yang, Diange}, title = {StreamingFlow: Streaming Occupancy Forecasting with Asynchronous Multi-modal Data Streams via Neural Ordinary Differential Equation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14833-14842} }
Language Model Guided Interpretable Video Action Reasoning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ning and Zhu, Guangming and Li, HS and Zhang, Liang and Shah, Syed Afaq Ali and Bennamoun, Mohammed}, title = {Language Model Guided Interpretable Video Action Reasoning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18878-18887} }
See Say and Segment: Teaching LMMs to Overcome False Premises-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Tsung-Han and Biamby, Giscard and Chan, David and Dunlap, Lisa and Gupta, Ritwik and Wang, Xudong and Gonzalez, Joseph E. and Darrell, Trevor}, title = {See Say and Segment: Teaching LMMs to Overcome False Premises}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13459-13469} }
Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zhiqi and Yu, Zhiding and Lan, Shiyi and Li, Jiahan and Kautz, Jan and Lu, Tong and Alvarez, Jose M.}, title = {Is Ego Status All You Need for Open-Loop End-to-End Autonomous Driving?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14864-14873} }
CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyu and Hua, Yang and Liang, Chumeng and Zhang, Jiaru and Wang, Hao and Song, Tao and Guan, Haibing}, title = {CGI-DM: Digital Copyright Authentication for Diffusion Models via Contrasting Gradient Inversion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10812-10821} }
Making Visual Sense of Oracle Bones for You and Me-
[pdf]
[supp]
[bibtex]@InProceedings{Qiao_2024_CVPR, author = {Qiao, Runqi and Yang, Lan and Pang, Kaiyue and Zhang, Honggang}, title = {Making Visual Sense of Oracle Bones for You and Me}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12656-12665} }
MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Chenyangguang and Jiao, Guanlong and Di, Yan and Wang, Gu and Huang, Ziqin and Zhang, Ruida and Manhardt, Fabian and Fu, Bowen and Tombari, Federico and Ji, Xiangyang}, title = {MOHO: Learning Single-view Hand-held Object Reconstruction with Multi-view Occlusion-Aware Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9992-10002} }
SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yamei and Di, Yan and Zhai, Guangyao and Manhardt, Fabian and Zhang, Chenyangguang and Zhang, Ruida and Tombari, Federico and Navab, Nassir and Busam, Benjamin}, title = {SecondPose: SE(3)-Consistent Dual-Stream Feature Fusion for Category-Level Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9959-9969} }
EgoGen: An Egocentric Synthetic Data Generator-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Gen and Zhao, Kaifeng and Zhang, Siwei and Lyu, Xiaozhong and Dusmanu, Mihai and Zhang, Yan and Pollefeys, Marc and Tang, Siyu}, title = {EgoGen: An Egocentric Synthetic Data Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14497-14509} }
Video ReCap: Recursive Captioning of Hour-Long Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Islam_2024_CVPR, author = {Islam, Md Mohaiminul and Ho, Ngan and Yang, Xitong and Nagarajan, Tushar and Torresani, Lorenzo and Bertasius, Gedas}, title = {Video ReCap: Recursive Captioning of Hour-Long Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18198-18208} }
Towards Realistic Scene Generation with LiDAR Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ran_2024_CVPR, author = {Ran, Haoxi and Guizilini, Vitor and Wang, Yue}, title = {Towards Realistic Scene Generation with LiDAR Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14738-14748} }
Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Enyo_2024_CVPR, author = {Enyo, Yuto and Nishino, Ko}, title = {Diffusion Reflectance Map: Single-Image Stochastic Inverse Rendering of Illumination and Reflectance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11873-11883} }
MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Xiang and Ni, Yuansheng and Zhang, Kai and Zheng, Tianyu and Liu, Ruoqi and Zhang, Ge and Stevens, Samuel and Jiang, Dongfu and Ren, Weiming and Sun, Yuxuan and Wei, Cong and Yu, Botao and Yuan, Ruibin and Sun, Renliang and Yin, Ming and Zheng, Boyuan and Yang, Zhenzhu and Liu, Yibo and Huang, Wenhao and Sun, Huan and Su, Yu and Chen, Wenhu}, title = {MMMU: A Massive Multi-discipline Multimodal Understanding and Reasoning Benchmark for Expert AGI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9556-9567} }
EarthLoc: Astronaut Photography Localization by Indexing Earth from Space-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Berton_2024_CVPR, author = {Berton, Gabriele and Stoken, Alex and Caputo, Barbara and Masone, Carlo}, title = {EarthLoc: Astronaut Photography Localization by Indexing Earth from Space}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12754-12764} }
Text-Image Alignment for Diffusion-Based Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kondapaneni_2024_CVPR, author = {Kondapaneni, Neehar and Marks, Markus and Knott, Manuel and Guimaraes, Rogerio and Perona, Pietro}, title = {Text-Image Alignment for Diffusion-Based Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13883-13893} }
MemFlow: Optical Flow Estimation and Prediction with Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dong_2024_CVPR, author = {Dong, Qiaole and Fu, Yanwei}, title = {MemFlow: Optical Flow Estimation and Prediction with Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19068-19078} }
Novel Class Discovery for Ultra-Fine-Grained Visual Categorization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Yu and Cai, Yaqi and Jia, Qi and Qiu, Binglin and Wang, Weimin and Pu, Nan}, title = {Novel Class Discovery for Ultra-Fine-Grained Visual Categorization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17679-17688} }
DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Biwen and Yu, Kai and Feng, Mengyang and Cui, Miaomiao and Xie, Xuansong}, title = {DiffusionGAN3D: Boosting Text-guided 3D Generation and Domain Adaptation by Combining 3D GANs and Diffusion Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10487-10497} }
Rethinking Boundary Discontinuity Problem for Oriented Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Hang and Liu, Xinyuan and Xu, Haonan and Ma, Yike and Zhu, Zunjie and Yan, Chenggang and Dai, Feng}, title = {Rethinking Boundary Discontinuity Problem for Oriented Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17406-17415} }
SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Carter_2024_CVPR, author = {Carter, Jonathan F. and Jorge, Jo\~ao and Gibson, Oliver and Tarassenko, Lionel}, title = {SleepVST: Sleep Staging from Near-Infrared Video Signals using Pre-Trained Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12479-12489} }
TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Shuhuai and Yao, Linli and Li, Shicheng and Sun, Xu and Hou, Lu}, title = {TimeChat: A Time-sensitive Multimodal Large Language Model for Long Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14313-14323} }
ManiFPT: Defining and Analyzing Fingerprints of Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Hae Jin and Khayatkhoei, Mahyar and AbdAlmageed, Wael}, title = {ManiFPT: Defining and Analyzing Fingerprints of Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10791-10801} }
Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Seyfioglu_2024_CVPR, author = {Seyfioglu, Mehmet Saygin and Ikezogwo, Wisdom O. and Ghezloo, Fatemeh and Krishna, Ranjay and Shapiro, Linda}, title = {Quilt-LLaVA: Visual Instruction Tuning by Extracting Localized Narratives from Open-Source Histopathology Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13183-13192} }
E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator-
[pdf]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Wenjun and Zhang, Lingling and Liu, Jun and Tang, Xi and Wang, Yaxian and Wang, Shaowei and Wang, Qianying}, title = {E-GPS: Explainable Geometry Problem Solving via Top-Down Solver and Bottom-Up Generator}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13828-13837} }
Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and He, Jiawei and Fan, Lue and Li, Hongxin and Chen, Yuntao and Zhang, Zhaoxiang}, title = {Driving into the Future: Multiview Visual Forecasting and Planning with World Model for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14749-14759} }
OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kong_2024_CVPR, author = {Kong, Lingdong and Liu, Youquan and Ng, Lai Xing and Cottereau, Benoit R. and Ooi, Wei Tsang}, title = {OpenESS: Event-based Semantic Scene Understanding with Open Vocabularies}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15686-15698} }
Do Vision and Language Encoders Represent the World Similarly?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Maniparambil_2024_CVPR, author = {Maniparambil, Mayug and Akshulakov, Raiymbek and Djilali, Yasser Abdelaziz Dahou and El Amine Seddik, Mohamed and Narayan, Sanath and Mangalam, Karttikeya and O'Connor, Noel E.}, title = {Do Vision and Language Encoders Represent the World Similarly?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14334-14343} }
MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaolu and Wang, Song and Li, Wentong and Yang, Ruizi and Chen, Junbo and Zhu, Jianke}, title = {MGMap: Mask-Guided Learning for Online Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14812-14821} }
VidLA: Video-Language Alignment at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rizve_2024_CVPR, author = {Rizve, Mamshad Nayeem and Fei, Fan and Unnikrishnan, Jayakrishnan and Tran, Son and Yao, Benjamin Z. and Zeng, Belinda and Shah, Mubarak and Chilimbi, Trishul}, title = {VidLA: Video-Language Alignment at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14043-14055} }
ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jingyu and Yang, Kun and Wang, Yilei and Wang, Hanqi and Sun, Peng and Song, Liang}, title = {ERMVP: Communication-Efficient and Collaboration-Robust Multi-Vehicle Perception in Challenging Environments}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12575-12584} }
PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Neseem_2024_CVPR, author = {Neseem, Marina and McCullough, Conor and Hsin, Randy and Leichner, Chas and Li, Shan and Chong, In Suk and Howard, Andrew and Lew, Lukasz and Reda, Sherief and Rautio, Ville-Mikko and Moro, Daniele}, title = {PikeLPN: Mitigating Overlooked Inefficiencies of Low-Precision Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15996-16005} }
CAGE: Controllable Articulation GEneration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Jiayi and Tam, Hou In Ivan and Mahdavi-Amiri, Ali and Savva, Manolis}, title = {CAGE: Controllable Articulation GEneration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17880-17889} }
FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Basu_2024_CVPR, author = {Basu, Soumen and Gupta, Mayuna and Madan, Chetan and Gupta, Pankaj and Arora, Chetan}, title = {FocusMAE: Gallbladder Cancer Detection from Ultrasound Videos with Focused Masked Autoencoders}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11715-11725} }
Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Wildes, Richard P. and Derpanis, Konstantinos G.}, title = {Visual Concept Connectome (VCC): Open World Concept Discovery and their Interlayer Connections in Deep Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10895-10905} }
GRAM: Global Reasoning for Multi-Page VQA-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Blau_2024_CVPR, author = {Blau, Tsachi and Fogel, Sharon and Ronen, Roi and Golts, Alona and Ganz, Roy and Ben Avraham, Elad and Aberdam, Aviad and Tsiper, Shahar and Litman, Ron}, title = {GRAM: Global Reasoning for Multi-Page VQA}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15598-15607} }
MS-DETR: Efficient DETR Training with Mixed Supervision-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chuyang and Sun, Yifan and Wang, Wenhao and Chen, Qiang and Ding, Errui and Yang, Yi and Wang, Jingdong}, title = {MS-DETR: Efficient DETR Training with Mixed Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17027-17036} }
BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Wenjie and Lu, Yehao and Zheng, Guangcong and Zhan, Shuigen and Ye, Xiaoqing and Tan, Zichang and Wang, Jingdong and Wang, Gaoang and Li, Xi}, title = {BEVSpread: Spread Voxel Pooling for Bird's-Eye-View Representation in Vision-based Roadside 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14718-14727} }
DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Min_2024_CVPR, author = {Min, Chen and Zhao, Dawei and Xiao, Liang and Zhao, Jian and Xu, Xinli and Zhu, Zheng and Jin, Lei and Li, Jianshu and Guo, Yulan and Xing, Junliang and Jing, Liping and Nie, Yiming and Dai, Bin}, title = {DriveWorld: 4D Pre-trained Scene Understanding via World Models for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15522-15533} }
Bridging the Gap Between End-to-End and Two-Step Text Spotting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Mingxin and Li, Hongliang and Liu, Yuliang and Bai, Xiang and Jin, Lianwen}, title = {Bridging the Gap Between End-to-End and Two-Step Text Spotting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15608-15618} }
SUGAR: Pre-training 3D Visual Representations for Robotics-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Shizhe and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, title = {SUGAR: Pre-training 3D Visual Representations for Robotics}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18049-18060} }
PairAug: What Can Augmented Image-Text Pairs Do for Radiology?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Yutong and Chen, Qi and Wang, Sinuo and To, Minh-Son and Lee, Iris and Khoo, Ee Win and Hendy, Kerolos and Koh, Daniel and Xia, Yong and Wu, Qi}, title = {PairAug: What Can Augmented Image-Text Pairs Do for Radiology?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11652-11661} }
Harnessing Large Language Models for Training-free Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zanella_2024_CVPR, author = {Zanella, Luca and Menapace, Willi and Mancini, Massimiliano and Wang, Yiming and Ricci, Elisa}, title = {Harnessing Large Language Models for Training-free Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18527-18536} }
FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment-
[pdf]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jinglin and Yin, Sibo and Zhao, Guohao and Wang, Zishuo and Peng, Yuxin}, title = {FineParser: A Fine-grained Spatio-temporal Action Parser for Human-centric Action Quality Assessment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14628-14637} }
Language Models as Black-Box Optimizers for Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shihong and Yu, Samuel and Lin, Zhiqiu and Pathak, Deepak and Ramanan, Deva}, title = {Language Models as Black-Box Optimizers for Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12687-12697} }
Exploring Orthogonality in Open World Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Zhicheng and Li, Jinghan and Mu, Yadong}, title = {Exploring Orthogonality in Open World Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17302-17312} }
Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Leng_2024_CVPR, author = {Leng, Sicong and Zhang, Hang and Chen, Guanzheng and Li, Xin and Lu, Shijian and Miao, Chunyan and Bing, Lidong}, title = {Mitigating Object Hallucinations in Large Vision-Language Models through Visual Contrastive Decoding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13872-13882} }
Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Cheng and Yang, Xiaofeng and Yang, Fan and Feng, Chengzeng and Fu, Zhoujie and Foo, Chuan-Sheng and Lin, Guosheng and Liu, Fayao}, title = {Sculpt3D: Multi-View Consistent Text-to-3D Generation with Sparse 3D Prior}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10228-10237} }
ScanFormer: Referring Expression Comprehension by Iteratively Scanning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Su_2024_CVPR, author = {Su, Wei and Miao, Peihan and Dou, Huanzhang and Li, Xi}, title = {ScanFormer: Referring Expression Comprehension by Iteratively Scanning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13449-13458} }
Model Inversion Robustness: Can Transfer Learning Help?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2024_CVPR, author = {Ho, Sy-Tuyen and Hao, Koh Jun and Chandrasegaran, Keshigeyan and Nguyen, Ngoc-Bao and Cheung, Ngai-Man}, title = {Model Inversion Robustness: Can Transfer Learning Help?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12183-12193} }
RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Tianyu and Yao, Yuan and Zhang, Haoye and He, Taiwen and Han, Yifeng and Cui, Ganqu and Hu, Jinyi and Liu, Zhiyuan and Zheng, Hai-Tao and Sun, Maosong and Chua, Tat-Seng}, title = {RLHF-V: Towards Trustworthy MLLMs via Behavior Alignment from Fine-grained Correctional Human Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13807-13816} }
ZeroShape: Regression-based Zero-shot Shape Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Stojanov, Stefan and Thai, Anh and Jampani, Varun and Rehg, James M.}, title = {ZeroShape: Regression-based Zero-shot Shape Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10061-10071} }
The STVchrono Dataset: Towards Continuous Change Recognition in Time-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yanjun and Qiu, Yue and Khan, Mariia and Matsuzawa, Fumiya and Iwata, Kenji}, title = {The STVchrono Dataset: Towards Continuous Change Recognition in Time}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14111-14120} }
SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wong_2024_CVPR, author = {Wong, Conghao and Xia, Beihao and Zou, Ziqian and Wang, Yulong and You, Xinge}, title = {SocialCircle: Learning the Angle-based Social Interaction Representation for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19005-19015} }
Neighbor Relations Matter in Video Scene Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Jiawei and Wang, Hongxing and Li, Jiaxin and Ou, Zhilong and Qian, Zhangbin}, title = {Neighbor Relations Matter in Video Scene Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18473-18482} }
Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Text-to-Image Diffusion Models are Great Sketch-Photo Matchmakers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16826-16837} }
Mudslide: A Universal Nuclear Instance Segmentation Method-
[pdf]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jun}, title = {Mudslide: A Universal Nuclear Instance Segmentation Method}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11673-11682} }
Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lee_2024_CVPR, author = {Lee, Sangmin and Lai, Bolin and Ryan, Fiona and Boote, Bikram and Rehg, James M.}, title = {Modeling Multimodal Social Interactions: New Challenges and Baselines with Densely Aligned Representations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14585-14595} }
Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Deng and Wu, Aming and Wang, Yaowei and Han, Yahong}, title = {Prompt-Driven Dynamic Object-Centric Learning for Single Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17606-17615} }
Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sarkar_2024_CVPR, author = {Sarkar, Rohan and Kak, Avinash}, title = {Dual Pose-invariant Embeddings: Learning Category and Object-specific Discriminative Representations for Recognition and Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17077-17085} }
vid-TLDR: Training Free Token Merging for Light-weight Video Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Choi_2024_CVPR, author = {Choi, Joonmyung and Lee, Sanghyeok and Chu, Jaewon and Choi, Minhyuk and Kim, Hyunwoo J.}, title = {vid-TLDR: Training Free Token Merging for Light-weight Video Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18771-18781} }
DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yangyi and Sikka, Karan and Cogswell, Michael and Ji, Heng and Divakaran, Ajay}, title = {DRESS: Instructing Large Vision-Language Models to Align and Interact with Humans via Natural Language Feedback}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14239-14250} }
Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hou_2024_CVPR, author = {Hou, Xiuquan and Liu, Meiqin and Zhang, Senlin and Wei, Ping and Chen, Badong}, title = {Salience DETR: Enhancing Detection Transformer with Hierarchical Salience Filtering Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17574-17583} }
Towards More Unified In-context Visual Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sheng_2024_CVPR, author = {Sheng, Dianmo and Chen, Dongdong and Tan, Zhentao and Liu, Qiankun and Chu, Qi and Bao, Jianmin and Gong, Tao and Liu, Bin and Xu, Shengwei and Yu, Nenghai}, title = {Towards More Unified In-context Visual Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13362-13372} }
F3Loc: Fusion and Filtering for Floorplan Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Changan and Wang, Rui and Vogel, Christoph and Pollefeys, Marc}, title = {F3Loc: Fusion and Filtering for Floorplan Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18029-18038} }
Multi-View Attentive Contextualization for Multi-View 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xianpeng and Zheng, Ce and Qian, Ming and Xue, Nan and Chen, Chen and Zhang, Zhebin and Li, Chen and Wu, Tianfu}, title = {Multi-View Attentive Contextualization for Multi-View 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16688-16698} }
MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Xiaolong and Wu, Huisi and Zeng, Runhao and Qin, Jing}, title = {MemSAM: Taming Segment Anything Model for Echocardiography Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9622-9631} }
Language-conditioned Detection Transformer-
[pdf]
[supp]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Jang Hyun and Kr\"ahenb\"uhl, Philipp}, title = {Language-conditioned Detection Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16593-16603} }
Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Danish_2024_CVPR, author = {Danish, Muhammad Sohail and Khan, Muhammad Haris and Munir, Muhammad Akhtar and Sarfraz, M. Saquib and Ali, Mohsen}, title = {Improving Single Domain-Generalized Object Detection: A Focus on Diversification and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17732-17742} }
ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bai_2024_CVPR, author = {Bai, Yifan and Zhao, Zeyang and Gong, Yihong and Wei, Xing}, title = {ARTrackV2: Prompting Autoregressive Tracker Where to Look and How to Describe}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19048-19057} }
A Vision Check-up for Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sharma_2024_CVPR, author = {Sharma, Pratyusha and Shaham, Tamar Rott and Baradad, Manel and Fu, Stephanie and Rodriguez-Munoz, Adrian and Duggal, Shivam and Isola, Phillip and Torralba, Antonio}, title = {A Vision Check-up for Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14410-14419} }
SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Chull Hwan and Hwang, Taebaek and Yoon, Jooyoung and Choi, Shunghyun and Gu, Yeong Hyeon}, title = {SyncMask: Synchronized Attentional Masking for Fashion-centric Vision-Language Pretraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13948-13957} }
Countering Personalized Text-to-Image Generation with Influence Watermarks-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Hanwen and Sun, Zhicheng and Mu, Yadong}, title = {Countering Personalized Text-to-Image Generation with Influence Watermarks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12257-12267} }
PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiaofan and Zhang, Zhizhong and Tan, Xin and Chen, Chengwei and Qu, Yanyun and Xie, Yuan and Ma, Lizhuang}, title = {PromptAD: Learning Prompts with only Normal Samples for Few-Shot Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16838-16848} }
DETRs Beat YOLOs on Real-time Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Yian and Lv, Wenyu and Xu, Shangliang and Wei, Jinman and Wang, Guanzhong and Dang, Qingqing and Liu, Yi and Chen, Jie}, title = {DETRs Beat YOLOs on Real-time Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16965-16974} }
An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing-
[pdf]
[supp]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Feiran and Zhang, Chenlin and Guo, Jiangliang and Wei, Xiu-Shen and Zhao, Lin and Xu, Anqi and Gao, Lingyan}, title = {An Asymmetric Augmented Self-Supervised Learning Method for Unsupervised Fine-Grained Image Hashing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17648-17657} }
Exploring Pose-Aware Human-Object Interaction via Hybrid Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Eastman Z Y and Li, Yali and Wang, Yuan and Wang, Shengjin}, title = {Exploring Pose-Aware Human-Object Interaction via Hybrid Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17815-17825} }
Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Wen_2024_CVPR, author = {Wen, Di and Xu, Haoran and He, Zhaocheng and Wu, Zhe and Tan, Guang and Peng, Peixi}, title = {Density-Adaptive Model Based on Motif Matrix for Multi-Agent Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14822-14832} }
Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking-
[pdf]
[supp]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Cheng-Yao and Hsu, Yen-Chi and Liu, Tyng-Luh}, title = {Contrastive Learning for DeepFake Classification and Localization via Multi-Label Ranking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17627-17637} }
Enhancing the Power of OOD Detection via Sample-Aware Model Selection-
[pdf]
[supp]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Feng and He, Zi and Zhang, Yuan and Xie, Chuanlong and Li, Zhenguo and Tan, Falong}, title = {Enhancing the Power of OOD Detection via Sample-Aware Model Selection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17148-17157} }
Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Rui and Liang, Chenwei and Cao, Hu and Yan, Zhiran and Zimmer, Walter and Gross, Markus and Festag, Andreas and Knoll, Alois}, title = {Collaborative Semantic Occupancy Prediction with Hybrid Feature Fusion in Connected Automated Vehicles}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17996-18006} }
Towards Generalizable Tumor Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Qi and Chen, Xiaoxi and Song, Haorui and Xiong, Zhiwei and Yuille, Alan and Wei, Chen and Zhou, Zongwei}, title = {Towards Generalizable Tumor Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11147-11158} }
EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zehuan and Wen, Hao and Dong, Junting and Wang, Yaohui and Li, Yangguang and Chen, Xinyuan and Cao, Yan-Pei and Liang, Ding and Qiao, Yu and Dai, Bo and Sheng, Lu}, title = {EpiDiff: Enhancing Multi-View Synthesis via Localized Epipolar-Constrained Diffusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9784-9794} }
On the Faithfulness of Vision Transformer Explanations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Kang, Weitai and Tang, Hao and Hong, Yuan and Yan, Yan}, title = {On the Faithfulness of Vision Transformer Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10936-10945} }
Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration-
[pdf]
[supp]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Yixuan and Yin, Zhangyue and Wang, Haibo and Wang, Yan and Qiu, Xipeng and Ge, Weifeng and Zhang, Wenqiang}, title = {Pixel-level Semantic Correspondence through Layout-aware Representation Learning and Multi-scale Matching Integration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17047-17056} }
Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiawen and Chen, Yuxuan and Chu, Hongbo and Sun, Qiehe and Guan, Tian and Han, Anjia and He, Yonghong}, title = {Dynamic Graph Representation with Knowledge-aware Attention for Histopathology Whole Slide Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11323-11332} }
Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yifei and Chen, Dapeng and Liu, Ruijin and Zhou, Sai and Xue, Wenyuan and Peng, Wei}, title = {Align Before Adapt: Leveraging Entity-to-Region Alignments for Generalizable Video Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18688-18698} }
Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions-
[pdf]
[supp]
[bibtex]@InProceedings{Chae_2024_CVPR, author = {Chae, Yujeong and Kim, Hyeonseong and Yoon, Kuk-Jin}, title = {Towards Robust 3D Object Detection with LiDAR and 4D Radar Fusion in Various Weather Conditions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15162-15172} }
Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Seungwook and Li, Kejie and Deng, Xueqing and Shi, Yichun and Cho, Minsu and Wang, Peng}, title = {Enhancing 3D Fidelity of Text-to-3D using Cross-View Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10649-10658} }
Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs-
[pdf]
[supp]
[bibtex]@InProceedings{Blayney_2024_CVPR, author = {Blayney, Hugh and Tian, Hanlin and Scott, Hamish and Goldbeck, Nils and Stetson, Chess and Angeloudis, Panagiotis}, title = {Bezier Everywhere All at Once: Learning Drivable Lanes as Bezier Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15365-15374} }
Can I Trust Your Answer? Visually Grounded Video Question Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Junbin and Yao, Angela and Li, Yicong and Chua, Tat-Seng}, title = {Can I Trust Your Answer? Visually Grounded Video Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13204-13214} }
Polos: Multimodal Metric Learning from Human Feedback for Image Captioning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wada_2024_CVPR, author = {Wada, Yuiga and Kaneda, Kanta and Saito, Daichi and Sugiura, Komei}, title = {Polos: Multimodal Metric Learning from Human Feedback for Image Captioning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13559-13568} }
Detours for Navigating Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ashutosh_2024_CVPR, author = {Ashutosh, Kumar and Xue, Zihui and Nagarajan, Tushar and Grauman, Kristen}, title = {Detours for Navigating Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18804-18815} }
Discontinuity-preserving Normal Integration with Auxiliary Edges-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Hyomin and Jung, Yucheol and Lee, Seungyong}, title = {Discontinuity-preserving Normal Integration with Auxiliary Edges}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11915-11923} }
Self-Supervised Multi-Object Tracking with Path Consistency-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Zijia and Shuai, Bing and Chen, Yanbei and Xu, Zhenlin and Modolo, Davide}, title = {Self-Supervised Multi-Object Tracking with Path Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19016-19026} }
Improving Distant 3D Object Detection Using 2D Box Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zetong and Yu, Zhiding and Choy, Chris and Wang, Renhao and Anandkumar, Anima and Alvarez, Jose M.}, title = {Improving Distant 3D Object Detection Using 2D Box Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14853-14863} }
HDQMF: Holographic Feature Decomposition Using Quantum Algorithms-
[pdf]
[supp]
[bibtex]@InProceedings{Poduval_2024_CVPR, author = {Poduval, Prathyush Prasanth and Zou, Zhuowen and Imani, Mohsen}, title = {HDQMF: Holographic Feature Decomposition Using Quantum Algorithms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10978-10987} }
UniPAD: A Universal Pre-training Paradigm for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Honghui and Zhang, Sha and Huang, Di and Wu, Xiaoyang and Zhu, Haoyi and He, Tong and Tang, Shixiang and Zhao, Hengshuang and Qiu, Qibo and Lin, Binbin and He, Xiaofei and Ouyang, Wanli}, title = {UniPAD: A Universal Pre-training Paradigm for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15238-15250} }
SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Howard_2024_CVPR, author = {Howard, Phillip and Madasu, Avinash and Le, Tiep and Moreno, Gustavo Lujan and Bhiwandiwalla, Anahita and Lal, Vasudev}, title = {SocialCounterfactuals: Probing and Mitigating Intersectional Social Biases in Vision-Language Models with Counterfactual Examples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11975-11985} }
Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds-
[pdf]
[supp]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Heejoon and Lee, Chunghwan and Hong, Je Hyeong}, title = {Efficient Privacy-Preserving Visual Localization Using 3D Ray Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9773-9783} }
CNC-Net: Self-Supervised Learning for CNC Machining Operations-
[pdf]
[supp]
[bibtex]@InProceedings{Yavartanoo_2024_CVPR, author = {Yavartanoo, Mohsen and Hong, Sangmin and Neshatavar, Reyhaneh and Lee, Kyoung Mu}, title = {CNC-Net: Self-Supervised Learning for CNC Machining Operations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9816-9825} }
OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Qidong and Dong, Xiaoyi and Zhang, Pan and Wang, Bin and He, Conghui and Wang, Jiaqi and Lin, Dahua and Zhang, Weiming and Yu, Nenghai}, title = {OPERA: Alleviating Hallucination in Multi-Modal Large Language Models via Over-Trust Penalty and Retrospection-Allocation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13418-13427} }
Volumetric Environment Representation for Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Rui and Wang, Wenguan and Yang, Yi}, title = {Volumetric Environment Representation for Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16317-16328} }
NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Zhenggang and Ren, Zhongzheng and Zhao, Xiaoming and Wen, Bowen and Tremblay, Jonathan and Birchfield, Stan and Schwing, Alexander}, title = {NeRFDeformer: NeRF Transformation from a Single View via 3D Scene Flows}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10293-10303} }
DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Fei and Wang, Zhongdao and Ma, Chao}, title = {DiffusionTrack: Point Set Diffusion Model for Visual Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19113-19124} }
Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nunes_2024_CVPR, author = {Nunes, Lucas and Marcuzzi, Rodrigo and Mersch, Benedikt and Behley, Jens and Stachniss, Cyrill}, title = {Scaling Diffusion Models to Real-World 3D LiDAR Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14770-14780} }
Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Wen and Lou, Jian and Zhou, Pan and Xie, Yulai and Feng, Dan and Sun, Yuhua and Zhang, Tailai and Sun, Lichao}, title = {Physical Backdoor: Towards Temperature-based Backdoor Attacks in the Physical World}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12733-12743} }
Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Franchi_2024_CVPR, author = {Franchi, Gianni and Laurent, Olivier and Leguery, Maxence and Bursuc, Andrei and Pilzer, Andrea and Yao, Angela}, title = {Make Me a BNN: A Simple Strategy for Estimating Bayesian Uncertainty from Pre-trained Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12194-12204} }
Language-only Training of Zero-shot Composed Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Geonmo and Chun, Sanghyuk and Kim, Wonjae and Kang, Yoohoon and Yun, Sangdoo}, title = {Language-only Training of Zero-shot Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13225-13234} }
Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Angchi and Zheng, Wei-Shi}, title = {Efficient and Effective Weakly-Supervised Action Segmentation via Action-Transition-Aware Boundary Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18253-18262} }
Pixel-Aligned Language Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jiarui and Zhou, Xingyi and Yan, Shen and Gu, Xiuye and Arnab, Anurag and Sun, Chen and Wang, Xiaolong and Schmid, Cordelia}, title = {Pixel-Aligned Language Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13030-13039} }
SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yanjie and Zou, Xu and Yan, Luxin and Zhong, Sheng and Zhou, Jiahuan}, title = {SNIDA: Unlocking Few-Shot Object Detection with Non-linear Semantic Decoupling Augmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12544-12553} }
Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Song and Yu, Jiawei and Li, Wentong and Liu, Wenyu and Liu, Xiaolu and Chen, Junbo and Zhu, Jianke}, title = {Not All Voxels Are Equal: Hardness-Aware Semantic Scene Completion with Self-Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14792-14801} }
3D-LFM: Lifting Foundation Model-
[pdf]
[supp]
[bibtex]@InProceedings{Dabhi_2024_CVPR, author = {Dabhi, Mosam and Jeni, L\'aszl\'o A. and Lucey, Simon}, title = {3D-LFM: Lifting Foundation Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10466-10475} }
Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Juanwu and Cui, Can and Ma, Yunsheng and Bera, Aniket and Wang, Ziran}, title = {Quantifying Uncertainty in Motion Prediction with Variational Bayesian Mixture}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15428-15437} }
Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users-
[pdf]
[supp]
[bibtex]@InProceedings{Massiceti_2024_CVPR, author = {Massiceti, Daniela and Longden, Camilla and Slowik, Agnieszka and Wills, Samuel and Grayson, Martin and Morrison, Cecily}, title = {Explaining CLIP's Performance Disparities on Data from Blind/Low Vision Users}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12172-12182} }
SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model-
[pdf]
[arXiv]
[bibtex]@InProceedings{Bae_2024_CVPR, author = {Bae, Inhwan and Park, Young-Jae and Jeon, Hae-Gon}, title = {SingularTrajectory: Universal Trajectory Predictor Using Diffusion Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17890-17901} }
Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yu and Gao, Fei and Zhang, Yanguang and Qiao, Maoying and Wang, Nannan}, title = {Generating Handwritten Mathematical Expressions From Symbol Graphs: An End-to-End Pipeline}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15675-15685} }
Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nagasinghe_2024_CVPR, author = {Nagasinghe, Kumaranage Ravindu Yasas and Zhou, Honglu and Gunawardhana, Malitha and Min, Martin Renqiang and Harari, Daniel and Khan, Muhammad Haris}, title = {Why Not Use Your Textbook? Knowledge-Enhanced Procedure Planning of Instructional Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18816-18826} }
FreeKD: Knowledge Distillation via Semantic Frequency Prompt-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Yuan and Huang, Tao and Liu, Jiaming and Jiang, Tao and Cheng, Kuan and Zhang, Shanghang}, title = {FreeKD: Knowledge Distillation via Semantic Frequency Prompt}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15931-15940} }
Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Mittal_2024_CVPR, author = {Mittal, Himangi and Agarwal, Nakul and Lo, Shao-Yuan and Lee, Kwonjoon}, title = {Can't Make an Omelette Without Breaking Some Eggs: Plausible Action Anticipation Using Large Video-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18580-18590} }
On the Estimation of Image-matching Uncertainty in Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zaffar_2024_CVPR, author = {Zaffar, Mubariz and Nan, Liangliang and Kooij, Julian F. P.}, title = {On the Estimation of Image-matching Uncertainty in Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17743-17753} }
Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Junxi and Li, Liang and Su, Li and Zha, Zheng-jun and Huang, Qingming}, title = {Prompt-Enhanced Multiple Instance Learning for Weakly Supervised Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18319-18329} }
Non-autoregressive Sequence-to-Sequence Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Kunyu and Dong, Qi and Goncalves, Luis and Tu, Zhuowen and Soatto, Stefano}, title = {Non-autoregressive Sequence-to-Sequence Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13603-13612} }
Active Object Detection with Knowledge Aggregation and Distillation from Large Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Dejie and Liu, Yang}, title = {Active Object Detection with Knowledge Aggregation and Distillation from Large Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16624-16633} }
Weak-to-Strong 3D Object Detection with X-Ray Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Gambashidze_2024_CVPR, author = {Gambashidze, Alexander and Dadukin, Aleksandr and Golyadkin, Maxim and Razzhivina, Maria and Makarov, Ilya}, title = {Weak-to-Strong 3D Object Detection with X-Ray Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15055-15064} }
Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Zhou, Jianxiong and Xing, Xiaoying and Wu, Ying}, title = {Active Open-Vocabulary Recognition: Let Intelligent Moving Mitigate CLIP Limitations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16394-16403} }
Efficient Meshflow and Optical Flow Estimation from Event Cameras-
[pdf]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Xinglong and Luo, Ao and Wang, Zhengning and Lin, Chunyu and Zeng, Bing and Liu, Shuaicheng}, title = {Efficient Meshflow and Optical Flow Estimation from Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19198-19207} }
Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hu_2024_CVPR, author = {Hu, Yushi and Stretcu, Otilia and Lu, Chun-Ta and Viswanathan, Krishnamurthy and Hata, Kenji and Luo, Enming and Krishna, Ranjay and Fuxman, Ariel}, title = {Visual Program Distillation: Distilling Tools and Programmatic Reasoning into Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9590-9601} }
A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peirone_2024_CVPR, author = {Peirone, Simone Alberto and Pistilli, Francesca and Alliegro, Antonio and Averta, Giuseppe}, title = {A Backpack Full of Skills: Egocentric Video Understanding with Diverse Task Perspectives}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18275-18285} }
Visual In-Context Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Feng and Jiang, Qing and Zhang, Hao and Ren, Tianhe and Liu, Shilong and Zou, Xueyan and Xu, Huaizhe and Li, Hongyang and Yang, Jianwei and Li, Chunyuan and Zhang, Lei and Gao, Jianfeng}, title = {Visual In-Context Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12861-12871} }
Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions-
[pdf]
[supp]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Weizhen and Deng, Yiheng and Tang, Shixiang and Chen, Qihao and Xie, Qingsong and Wang, Yizhou and Bai, Lei and Zhu, Feng and Zhao, Rui and Ouyang, Wanli and Qi, Donglian and Yan, Yunfeng}, title = {Instruct-ReID: A Multi-purpose Person Re-identification Task with Instructions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17521-17531} }
IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Minghao and Wu, Shangzhe and Han, Kai}, title = {IBD-SLAM: Learning Image-Based Depth Fusion for Generalizable SLAM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10563-10573} }
CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Javed_2024_CVPR, author = {Javed, Sajid and Mahmood, Arif and Ganapathi, Iyyakutti Iyappan and Dharejo, Fayaz Ali and Werghi, Naoufel and Bennamoun, Mohammed}, title = {CPLIP: Zero-Shot Learning for Histopathology with Comprehensive Vision-Language Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11450-11459} }
Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector-
[pdf]
[supp]
[bibtex]@InProceedings{Ding_2024_CVPR, author = {Ding, Yifu and Feng, Weilun and Chen, Chuyan and Guo, Jinyang and Liu, Xianglong}, title = {Reg-PTQ: Regression-specialized Post-training Quantization for Fully Quantized Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16174-16184} }
Action Scene Graphs for Long-Form Understanding of Egocentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rodin_2024_CVPR, author = {Rodin, Ivan and Furnari, Antonino and Min, Kyle and Tripathi, Subarna and Farinella, Giovanni Maria}, title = {Action Scene Graphs for Long-Form Understanding of Egocentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18622-18632} }
De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuzheng and Yang, Dingkang and Chen, Zhaoyu and Liu, Yang and Liu, Siao and Zhang, Wenqiang and Zhang, Lihua and Qi, Lizhe}, title = {De-confounded Data-free Knowledge Distillation for Handling Distribution Shifts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12615-12625} }
Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Chaolei and Lai, Jianhuang and Zheng, Wei-Shi and Hu, Jian-Fang}, title = {Siamese Learning with Joint Alignment and Regression for Weakly-Supervised Video Paragraph Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13569-13580} }
LEOD: Label-Efficient Object Detection for Event Cameras-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Ziyi and Gehrig, Mathias and Lyu, Qing and Liu, Xudong and Gilitschenski, Igor}, title = {LEOD: Label-Efficient Object Detection for Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16933-16943} }
Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Song_2024_CVPR, author = {Song, Andrew H. and Chen, Richard J. and Ding, Tong and Williamson, Drew F.K. and Jaume, Guillaume and Mahmood, Faisal}, title = {Morphological Prototyping for Unsupervised Slide Representation Learning in Computational Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11566-11578} }
Dense Optical Tracking: Connecting the Dots-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Le_Moing_2024_CVPR, author = {Le Moing, Guillaume and Ponce, Jean and Schmid, Cordelia}, title = {Dense Optical Tracking: Connecting the Dots}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19187-19197} }
A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Xiaoyang and Yang, Mengda and Yi, Wenzhe and Li, Ziang and Wang, Juan and Hu, Hongxin and Zhuang, Yong and Liu, Yaxin}, title = {A Stealthy Wrongdoer: Feature-Oriented Reconstruction Attack against Split Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12130-12139} }
TULIP: Transformer for Upsampling of LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Pfreundschuh, Patrick and Siegwart, Roland and Hutter, Marco and Moghadam, Peyman and Patil, Vaishakh}, title = {TULIP: Transformer for Upsampling of LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15354-15364} }
BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Ruyang and Li, Chen and Ge, Yixiao and Li, Thomas H. and Shan, Ying and Li, Ge}, title = {BT-Adapter: Video Conversation is Feasible Without Video Instruction Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13658-13667} }
Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts-
[pdf]
[supp]
[bibtex]@InProceedings{Ni_2024_CVPR, author = {Ni, Fei and Hao, Jianye and Wu, Shiguang and Kou, Longxin and Liu, Jiashun and Zheng, Yan and Wang, Bin and Zhuang, Yuzheng}, title = {Generate Subgoal Images before Act: Unlocking the Chain-of-Thought Reasoning in Diffusion Model for Robot Manipulation with Multimodal Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13991-14000} }
Asymmetric Masked Distillation for Pre-Training Small Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Zhiyu and Huang, Bingkun and Xing, Sen and Wu, Gangshan and Qiao, Yu and Wang, Limin}, title = {Asymmetric Masked Distillation for Pre-Training Small Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18516-18526} }
MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Yiran and Zhou, Enshen and Liu, Qichang and Yin, Zhenfei and Sheng, Lu and Zhang, Ruimao and Qiao, Yu and Shao, Jing}, title = {MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16307-16316} }
Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Hang and Zhang, Sicheng and Xie, Binzhu and Nan, Guoshun and Zhang, Jiayang and Xu, Junrui and Liu, Hangyu and Leng, Sicong and Liu, Jiangming and Fan, Hehe and Huang, Dajiu and Feng, Jing and Chen, Linli and Zhang, Can and Li, Xuhuan and Zhang, Hao and Chen, Jianhang and Cui, Qimei and Tao, Xiaofeng}, title = {Uncovering What Why and How: A Comprehensive Benchmark for Causation Understanding of Video Anomaly}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18793-18803} }
MiKASA: Multi-Key-Anchor & Scene-Aware Transformer for 3D Visual Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chang_2024_CVPR, author = {Chang, Chun-Peng and Wang, Shaoxiang and Pagani, Alain and Stricker, Didier}, title = {MiKASA: Multi-Key-Anchor \& Scene-Aware Transformer for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14131-14140} }
ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jiang_2024_CVPR, author = {Jiang, Yankai and Huang, Zhongzhen and Zhang, Rongzhao and Zhang, Xiaofan and Zhang, Shaoting}, title = {ZePT: Zero-Shot Pan-Tumor Segmentation via Query-Disentangling and Self-Prompting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11386-11397} }
Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Jin and Wei, Ping and Li, Huan and Ren, Ziyang}, title = {Task-Driven Exploration: Decoupling and Inter-Task Feedback for Joint Moment Retrieval and Highlight Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18308-18318} }
MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vasu_2024_CVPR, author = {Vasu, Pavan Kumar Anasosalu and Pouransari, Hadi and Faghri, Fartash and Vemulapalli, Raviteja and Tuzel, Oncel}, title = {MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15963-15974} }
VideoCon: Robust Video-Language Alignment via Contrast Captions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bansal_2024_CVPR, author = {Bansal, Hritik and Bitton, Yonatan and Szpektor, Idan and Chang, Kai-Wei and Grover, Aditya}, title = {VideoCon: Robust Video-Language Alignment via Contrast Captions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13927-13937} }
Discovering and Mitigating Visual Biases through Keyword Explanation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Younghyun and Mo, Sangwoo and Kim, Minkyu and Lee, Kyungmin and Lee, Jaeho and Shin, Jinwoo}, title = {Discovering and Mitigating Visual Biases through Keyword Explanation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11082-11092} }
Robust Emotion Recognition in Context Debiasing-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Dingkang and Yang, Kun and Li, Mingcheng and Wang, Shunli and Wang, Shuaibing and Zhang, Lihua}, title = {Robust Emotion Recognition in Context Debiasing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12447-12457} }
CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chowdhury_2024_CVPR, author = {Chowdhury, Townim Faisal and Liao, Kewen and Phan, Vu Minh Hieu and To, Minh-Son and Xie, Yutong and Hung, Kevin and Ross, David and van den Hengel, Anton and Verjans, Johan W. and Liao, Zhibin}, title = {CAPE: CAM as a Probabilistic Ensemble for Enhanced DNN Interpretation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11072-11081} }
Multi-Space Alignments Towards Universal LiDAR Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Youquan and Kong, Lingdong and Wu, Xiaoyang and Chen, Runnan and Li, Xin and Pan, Liang and Liu, Ziwei and Ma, Yuexin}, title = {Multi-Space Alignments Towards Universal LiDAR Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14648-14661} }
FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models-
[pdf]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Ao and Li, Xin and Yang, Fan and Liu, Jiangyu and Fan, Haoqiang and Liu, Shuaicheng}, title = {FlowDiffuser: Advancing Optical Flow Estimation with Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19167-19176} }
Free3D: Consistent Novel View Synthesis without 3D Representation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Chuanxia and Vedaldi, Andrea}, title = {Free3D: Consistent Novel View Synthesis without 3D Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9720-9731} }
WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vuong_2024_CVPR, author = {Vuong, Khiem and Reddy, N Dinesh and Tamburo, Robert and Narasimhan, Srinivasa G.}, title = {WALT3D: Generating Realistic Training Data from Time-Lapse Imagery for Reconstructing Dynamic Objects Under Occlusion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9514-9524} }
Towards Language-Driven Video Inpainting via Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Jianzong and Li, Xiangtai and Si, Chenyang and Zhou, Shangchen and Yang, Jingkang and Zhang, Jiangning and Li, Yining and Chen, Kai and Tong, Yunhai and Liu, Ziwei and Loy, Chen Change}, title = {Towards Language-Driven Video Inpainting via Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12501-12511} }
CLIP-KD: An Empirical Study of CLIP Model Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Chuanguang and An, Zhulin and Huang, Libo and Bi, Junyu and Yu, Xinqiang and Yang, Han and Diao, Boyu and Xu, Yongjun}, title = {CLIP-KD: An Empirical Study of CLIP Model Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15952-15962} }
OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Lingyi and Yan, Shilin and Zhang, Renrui and Li, Wanyun and Zhou, Xinyu and Guo, Pinxue and Jiang, Kaixun and Chen, Yiting and Li, Jinglun and Chen, Zhaoyu and Zhang, Wenqiang}, title = {OneTracker: Unifying Visual Object Tracking with Foundation Models and Efficient Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19079-19091} }
SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Yue_2024_CVPR, author = {Yue, Tongtian and Cheng, Jie and Guo, Longteng and Dai, Xingyuan and Zhao, Zijia and He, Xingjian and Xiong, Gang and Lv, Yisheng and Liu, Jing}, title = {SC-Tune: Unleashing Self-Consistent Referential Comprehension in Large Vision Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13073-13083} }
NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Yufei and Guo, Heng and Fukai, Koki and Santo, Hiroaki and Shi, Boxin and Okura, Fumio and Ma, Zhanyu and Jia, Yunpeng}, title = {NeRSP: Neural 3D Reconstruction for Reflective Objects with Sparse Polarized Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11821-11830} }
Retrieval-Augmented Embodied Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Yichen and Ou, Zhicai and Mou, Xiaofeng and Tang, Jian}, title = {Retrieval-Augmented Embodied Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17985-17995} }
SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gang and Chen, Junnan and Gao, Guohuan and Li, Jianmin and Liu, Si and Hu, Xiaolin}, title = {SAFDNet: A Simple and Effective Network for Fully Sparse 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14477-14486} }
HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Qiming and Ye, Wei and Wu, Hai and Zhao, Shijia and Xing, Leyuan and Huang, Xun and Deng, Jinhao and Li, Xin and Wen, Chenglu and Wang, Cheng}, title = {HINTED: Hard Instance Enhanced Detector with Mixed-Density Feature Fusion for Sparsely-Supervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15321-15330} }
Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Shizhan and Dou, Qi and Farnia, Farzan}, title = {Structured Gradient-based Interpretations via Norm-Regularized Adversarial Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11009-11018} }
3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces-
[pdf]
[arXiv]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Linyi and Kulkarni, Nilesh and Fouhey, David F.}, title = {3DFIRES: Few Image 3D REconstruction for Scenes with Hidden Surfaces}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9742-9751} }
MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Bor-Shiun and Wang, Chien-Yi and Chiu, Wei-Chen}, title = {MCPNet: An Interpretable Classifier via Multi-Level Concept Prototypes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10885-10894} }
ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Norouzi_2024_CVPR, author = {Norouzi, Narges and Orlova, Svetlana and de Geus, Daan and Dubbelman, Gijs}, title = {ALGM: Adaptive Local-then-Global Token Merging for Efficient Semantic Segmentation with Plain Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15773-15782} }
Single-Model and Any-Modality for Video Object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zongwei and Zheng, Jilai and Ren, Xiangxuan and Vasluianu, Florin-Alexandru and Ma, Chao and Paudel, Danda Pani and Van Gool, Luc and Timofte, Radu}, title = {Single-Model and Any-Modality for Video Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19156-19166} }
FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking-
[pdf]
[bibtex]@InProceedings{Cho_2024_CVPR, author = {Cho, Seokju and Huang, Jiahui and Kim, Seungryong and Lee, Joon-Young}, title = {FlowTrack: Revisiting Optical Flow for Long-Range Dense Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19268-19277} }
Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Wujian and Xie, Sicheng and You, Zuyao and Lan, Shiyi and Wu, Zuxuan}, title = {Synthesize Diagnose and Optimize: Towards Fine-Grained Vision-Language Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13279-13288} }
WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Satish and Zhang, Bowen and Gudavalli, Chandrakanth and Levenson, Connor and Hughey, Lacey and Stabach, Jared A. and Amoke, Irene and Ojwang, Gordon and Mukeka, Joseph and Mwiu, Stephen and Ogutu, Joseph and Frederick, Howard and Manjunath, B.S.}, title = {WildlifeMapper: Aerial Image Analysis for Multi-Species Detection and Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12594-12604} }
Tune-An-Ellipse: CLIP Has Potential to Find What You Want-
[pdf]
[supp]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jinheng and Deng, Songhe and Li, Bing and Liu, Haozhe and Huang, Yawen and Zheng, Yefeng and Schmidhuber, Jurgen and Ghanem, Bernard and Shen, Linlin and Shou, Mike Zheng}, title = {Tune-An-Ellipse: CLIP Has Potential to Find What You Want}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13723-13732} }
Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Huyong and Wu, Huisi and Qin, Jing}, title = {Incremental Nuclei Segmentation from Histopathological Images via Future-class Awareness and Compatibility-inspired Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11408-11417} }
DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Weiyi and Huang, Yuhang and Zhang, Ning and Lin, Ruei-Sung and Han, Mei and Zeng, Dan}, title = {DiffMOT: A Real-time Diffusion-based Multiple Object Tracker with Non-linear Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19321-19330} }
Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living-
[pdf]
[supp]
[bibtex]@InProceedings{Reilly_2024_CVPR, author = {Reilly, Dominick and Das, Srijan}, title = {Just Add ?! Pose Induced Video Transformers for Understanding Activities of Daily Living}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18340-18350} }
ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Jiangbo and Li, Chen and Gong, Tieliang and Zheng, Yefeng and Fu, Huazhu}, title = {ViLa-MIL: Dual-scale Vision-Language Multiple Instance Learning for Whole Slide Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11248-11258} }
CapsFusion: Rethinking Image-Text Data at Scale-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qiying and Sun, Quan and Zhang, Xiaosong and Cui, Yufeng and Zhang, Fan and Cao, Yue and Wang, Xinlong and Liu, Jingjing}, title = {CapsFusion: Rethinking Image-Text Data at Scale}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14022-14032} }
Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images-
[pdf]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Wei and Shi, YangYang and Zhang, Daoqiang and Zhou, JunJie and Wan, Peng}, title = {Tumor Micro-environment Interactions Guided Graph Learning for Survival Analysis of Human Cancers from Whole-slide Pathological Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11694-11703} }
Towards Generalizable Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qin_2024_CVPR, author = {Qin, Zheng and Wang, Le and Zhou, Sanping and Fu, Panpan and Hua, Gang and Tang, Wei}, title = {Towards Generalizable Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18995-19004} }
Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yizhi and Lira, Wallace and Wang, Wenqi and Mahdavi-Amiri, Ali and Zhang, Hao}, title = {Slice3D: Multi-Slice Occlusion-Revealing Single View 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9881-9891} }
IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration-
[pdf]
[supp]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Tai and Zhang, Suwei and Li, Jiafeng and Wen, Ying}, title = {IIRP-Net: Iterative Inference Residual Pyramid Network for Enhanced Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11546-11555} }
SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Peng and Yan, Zehong and Hsu, Wynne and Lee, Mong Li}, title = {SNIFFER: Multimodal Large Language Model for Explainable Out-of-Context Misinformation Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13052-13062} }
Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Saini_2024_CVPR, author = {Saini, Nirat and Pham, Khoi and Shrivastava, Abhinav}, title = {Beyond Seen Primitive Concepts and Attribute-Object Compositional Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14466-14476} }
Unleashing Network Potentials for Semantic Scene Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Fengyun and Sun, Qianru and Zhang, Dong and Tang, Jinhui}, title = {Unleashing Network Potentials for Semantic Scene Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10314-10323} }
Learning Occupancy for Monocular 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Liang and Xu, Junkai and Cheng, Haoran and Yang, Zheng and Wu, Xiaopei and Qian, Wei and Wang, Wenxiao and Wu, Boxi and Cai, Deng}, title = {Learning Occupancy for Monocular 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10281-10292} }
LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Dat and Mejri, Nesryne and Singh, Inder Pal and Kuleshova, Polina and Astrid, Marcella and Kacem, Anis and Ghorbel, Enjie and Aouada, Djamila}, title = {LAA-Net: Localized Artifact Attention Network for Quality-Agnostic and Generalizable Deepfake Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17395-17405} }
Rotation-Agnostic Image Representation Learning for Digital Pathology-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Alfasly_2024_CVPR, author = {Alfasly, Saghir and Shafique, Abubakr and Nejat, Peyman and Khan, Jibran and Alsaafin, Areej and Alabtah, Ghazal and Tizhoosh, H.R.}, title = {Rotation-Agnostic Image Representation Learning for Digital Pathology}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11683-11693} }
EASE-DETR: Easing the Competition among Object Queries-
[pdf]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yulu and Sun, Yifan and Ding, Xudong and Zhao, Chuyang and Liu, Si}, title = {EASE-DETR: Easing the Competition among Object Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17282-17291} }
Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hang and Shen, Chengzhi and Torr, Philip and Tresp, Volker and Gu, Jindong}, title = {Self-Discovering Interpretable Diffusion Latent Directions for Responsible Text-to-Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12006-12016} }
HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yifan and Liu, Dong and Zhang, Shuhai and Deng, Zeshuai and Huang, Zixiong and Tan, Mingkui}, title = {HiLo: Detailed and Robust 3D Clothed Human Reconstruction with High-and Low-Frequency Information of Parametric Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10671-10681} }
Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hwang_2024_CVPR, author = {Hwang, Minyoung and Weihs, Luca and Park, Chanwoo and Lee, Kimin and Kembhavi, Aniruddha and Ehsani, Kiana}, title = {Promptable Behaviors: Personalizing Multi-Objective Rewards from Human Preferences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16216-16226} }
Neural Underwater Scene Representation-
[pdf]
[supp]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Yunkai and Zhu, Chengxuan and Wan, Renjie and Xu, Chao and Shi, Boxin}, title = {Neural Underwater Scene Representation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11780-11789} }
Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos-
[pdf]
[supp]
[bibtex]@InProceedings{Shen_2024_CVPR, author = {Shen, Yuhan and Elhamifar, Ehsan}, title = {Progress-Aware Online Action Segmentation for Egocentric Procedural Task Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18186-18197} }
Constrained Layout Generation with Factor Graphs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Dupty_2024_CVPR, author = {Dupty, Mohammed Haroon and Dong, Yanfei and Leng, Sicong and Fu, Guoji and Goh, Yong Liang and Lu, Wei and Lee, Wee Sun}, title = {Constrained Layout Generation with Factor Graphs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12851-12860} }
SLICE: Stabilized LIME for Consistent Explanations for Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Bora_2024_CVPR, author = {Bora, Revoti Prasad and Terh\"orst, Philipp and Veldhuis, Raymond and Ramachandra, Raghavendra and Raja, Kiran}, title = {SLICE: Stabilized LIME for Consistent Explanations for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10988-10996} }
Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Ding, Choubo and Tian, Yu and Pang, Guansong}, title = {Anomaly Heterogeneity Learning for Open-set Supervised Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17616-17626} }
Revisiting Counterfactual Problems in Referring Expression Comprehension-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zhihan and Li, Ruifan}, title = {Revisiting Counterfactual Problems in Referring Expression Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13438-13448} }
Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Niedermayr_2024_CVPR, author = {Niedermayr, Simon and Stumpfegger, Josef and Westermann, R\"udiger}, title = {Compressed 3D Gaussian Splatting for Accelerated Novel View Synthesis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10349-10358} }
Separating the "Chirp" from the "Chat": Self-supervised Visual Grounding of Sound and Language-
[pdf]
[arXiv]
[bibtex]@InProceedings{Hamilton_2024_CVPR, author = {Hamilton, Mark and Zisserman, Andrew and Hershey, John R. and Freeman, William T.}, title = {Separating the ''Chirp'' from the ''Chat'': Self-supervised Visual Grounding of Sound and Language}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13117-13127} }
MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Bo and Li, Hengduo and Jang, Young Kyun and Jia, Menglin and Cao, Xuefei and Shah, Ashish and Shrivastava, Abhinav and Lim, Ser-Nam}, title = {MA-LMM: Memory-Augmented Large Multimodal Model for Long-Term Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13504-13514} }
Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Chen and Liu, Shuming and Mangalam, Karttikeya and Qian, Guocheng and Zohra, Fatimah and Alghannam, Abdulmohsen and Malik, Jitendra and Ghanem, Bernard}, title = {Dr2Net: Dynamic Reversible Dual-Residual Networks for Memory-Efficient Finetuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15835-15844} }
PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Qi and Asif, M. Salman and Ma, Zhan}, title = {PNeRV: Enhancing Spatial Consistency via Pyramidal Neural Representation for Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19103-19112} }
Point Transformer V3: Simpler Faster Stronger-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Xiaoyang and Jiang, Li and Wang, Peng-Shuai and Liu, Zhijian and Liu, Xihui and Qiao, Yu and Ouyang, Wanli and He, Tong and Zhao, Hengshuang}, title = {Point Transformer V3: Simpler Faster Stronger}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4840-4851} }
Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haoquan and Huang, Ronggang and Xie, Yi and Zhang, Huaidong}, title = {Mask4Align: Aligned Entity Prompting with Color Masks for Multi-Entity Localization Problems}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13373-13383} }
RCL: Reliable Continual Learning for Unified Failure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Fei and Cheng, Zhen and Zhang, Xu-Yao and Liu, Cheng-Lin and Zhang, Zhaoxiang}, title = {RCL: Reliable Continual Learning for Unified Failure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12140-12150} }
Referring Image Editing: Object-level Image Editing via Referring Expressions-
[pdf]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chang and Li, Xiangtai and Ding, Henghui}, title = {Referring Image Editing: Object-level Image Editing via Referring Expressions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13128-13138} }
Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Reddy_2024_CVPR, author = {Reddy, Arun and Paul, William and Rivera, Corban and Shah, Ketul and de Melo, Celso M. and Chellappa, Rama}, title = {Unsupervised Video Domain Adaptation with Masked Pre-Training and Collaborative Self-Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18919-18929} }
UniDepth: Universal Monocular Metric Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Piccinelli_2024_CVPR, author = {Piccinelli, Luigi and Yang, Yung-Hsu and Sakaridis, Christos and Segu, Mattia and Li, Siyuan and Van Gool, Luc and Yu, Fisher}, title = {UniDepth: Universal Monocular Metric Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10106-10116} }
NeuRAD: Neural Rendering for Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Tonderski_2024_CVPR, author = {Tonderski, Adam and Lindstr\"om, Carl and Hess, Georg and Ljungbergh, William and Svensson, Lennart and Petersson, Christoffer}, title = {NeuRAD: Neural Rendering for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14895-14904} }
Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models-
[pdf]
[supp]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Weiwei and Zhang, Jianpeng and Xia, Yingda and Mok, Tony C. W. and Li, Zi and Ye, Xianghua and Lu, Le and Zheng, Jian and Tang, Yuxing and Zhang, Ling}, title = {Bootstrapping Chest CT Image Understanding by Distilling Knowledge from X-ray Expert Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11238-11247} }
Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Pingping and Wang, Yuhao and Liu, Yang and Tu, Zhengzheng and Lu, Huchuan}, title = {Magic Tokens: Select Diverse Tokens for Multi-modal Object Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17117-17126} }
SignGraph: A Sign Sequence is Worth Graphs of Nodes-
[pdf]
[supp]
[bibtex]@InProceedings{Gan_2024_CVPR, author = {Gan, Shiwei and Yin, Yafeng and Jiang, Zhiwei and Wen, Hongkai and Xie, Lei and Lu, Sanglu}, title = {SignGraph: A Sign Sequence is Worth Graphs of Nodes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13470-13479} }
DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Cheng and Han, Shoudong and He, Mengyu and Zheng, Wenbo and Wei, Yuhao}, title = {DeconfuseTrack: Dealing with Confusion for Multi-Object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19290-19299} }
HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yi and Zhang, Hui and Yu, Jiaqian and Yang, Yifan and Jung, Sangil and Park, Seung-In and Yoo, ByungIn}, title = {HIMap: HybrId Representation Learning for End-to-end Vectorized HD Map Construction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15396-15406} }
Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guopeng and Qian, Ming and Xia, Gui-Song}, title = {Unleashing Unlabeled Data: A Paradigm for Cross-View Geo-Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16719-16729} }
PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuqi and Chen, Yuntao and Liao, Xingyu and Fan, Lue and Zhang, Zhaoxiang}, title = {PanoOcc: Unified Occupancy Representation for Camera-based 3D Panoptic Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17158-17168} }
Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Brahimi_2024_CVPR, author = {Brahimi, Mohammed and Haefner, Bjoern and Ye, Zhenzhang and Goldluecke, Bastian and Cremers, Daniel}, title = {Sparse Views Near Light: A Practical Paradigm for Uncalibrated Point-light Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11862-11872} }
LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Shah_2024_CVPR, author = {Shah, Nisarg A. and VS, Vibashan and Patel, Vishal M.}, title = {LQMFormer: Language-aware Query Mask Transformer for Referring Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12903-12913} }
Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding-
[pdf]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Sai and Lin, Yutian and Wu, Yu}, title = {Omni-Q: Omni-Directional Scene Understanding for Unsupervised Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14261-14270} }
VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens-
[pdf]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Fan and Jin, Xiaojie and Wang, Heng and Xian, Yuchen and Feng, Jiashi and Yang, Yi}, title = {VISTA-LLAMA: Reducing Hallucination in Video Language Models via Equal Distance to Visual Tokens}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13151-13160} }
Efficient Multitask Dense Predictor via Binarization-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Xu, Dan and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Efficient Multitask Dense Predictor via Binarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15899-15908} }
Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ganjdanesh_2024_CVPR, author = {Ganjdanesh, Alireza and Gao, Shangqian and Huang, Heng}, title = {Jointly Training and Pruning CNNs via Learnable Agent Guidance and Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16058-16069} }
Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation-
[pdf]
[supp]
[bibtex]@InProceedings{Ryu_2024_CVPR, author = {Ryu, Hyunwoo and Kim, Jiwoo and An, Hyunseok and Chang, Junwoo and Seo, Joohwan and Kim, Taehan and Kim, Yubin and Hwang, Chaewon and Choi, Jongeun and Horowitz, Roberto}, title = {Diffusion-EDFs: Bi-equivariant Denoising Generative Modeling on SE(3) for Visual Robotic Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18007-18018} }
Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Le and Awal, Rabiul and Agrawal, Aishwarya}, title = {Contrasting Intra-Modal and Ranking Cross-Modal Hard Negatives to Enhance Visio-Linguistic Compositional Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13774-13784} }
CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Changsheng and Lin, Liangwei and Chen, Yongqi and Li, Bin and Zeng, Jishen and Huang, Jiwu}, title = {CMA: A Chromaticity Map Adapter for Robust Detection of Screen-Recapture Document Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15577-15586} }
VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xiang and Shen, Qianli and Kawaguchi, Kenji}, title = {VA3: Virtually Assured Amplification Attack on Probabilistic Copyright Protection for Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12363-12373} }
Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jinlong and Li, Baolu and Tu, Zhengzhong and Liu, Xinyu and Guo, Qing and Juefei-Xu, Felix and Xu, Runsheng and Yu, Hongkai}, title = {Light the Night: A Multi-Condition Diffusion Framework for Unpaired Low-Light Enhancement in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15205-15215} }
Delving into the Trajectory Long-tail Distribution for Muti-object Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Sijia and Yu, En and Li, Jinyang and Tao, Wenbing}, title = {Delving into the Trajectory Long-tail Distribution for Muti-object Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19341-19351} }
Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Cheng and Tai, Wei-En and Shih, Yu-Lin and Chen, Kuan-Wei and Syu, Yong-Jing and The, Kent Selwyn and Wang, Yu-Chiang Frank and Chen, Hwann-Tzong}, title = {Seg2Reg: Differentiable 2D Segmentation to 1D Regression Rendering for 360 Room Layout Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10435-10445} }
UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Haimei and Zhang, Jing and Chen, Zhuo and Zhao, Shanshan and Tao, Dacheng}, title = {UniMix: Towards Domain Adaptive and Generalizable LiDAR Semantic Segmentation in Adverse Weather}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14781-14791} }
Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jang_2024_CVPR, author = {Jang, Young Kyun and Kim, Donghyun and Meng, Zihang and Huynh, Dat and Lim, Ser-Nam}, title = {Visual Delta Generator with Large Multi-modal Models for Semi-supervised Composed Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16805-16814} }
Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ilic_2024_CVPR, author = {Ilic, Filip and Zhao, He and Pock, Thomas and Wildes, Richard P.}, title = {Selective Interpretable and Motion Consistent Privacy Attribute Obfuscation for Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18730-18739} }
HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yongliang and Su, Yongzhi and Nathan, Praveen and Inuganti, Sandeep and Di, Yan and Sundermeyer, Martin and Manhardt, Fabian and Stricker, Didier and Rambach, Jason and Zhang, Yu}, title = {HiPose: Hierarchical Binary Surface Encoding and Correspondence Pruning for RGB-D 6DoF Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10148-10158} }
DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Zeqin and Ni, Jiangqun and Lin, Yuzhen and Deng, Haoyi and Li, Bin}, title = {DiffForensics: Leveraging Diffusion Prior to Image Forgery Detection and Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12765-12774} }
Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Keonhee and Muhle, Dominik and Wimbauer, Felix and Cremers, Daniel}, title = {Boosting Self-Supervision for Single-View Scene Completion via Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9837-9847} }
Sparse Global Matching for Video Frame Interpolation with Large Motion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chunxu and Zhang, Guozhen and Zhao, Rui and Wang, Limin}, title = {Sparse Global Matching for Video Frame Interpolation with Large Motion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19125-19134} }
ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Hu, Junyao and Cheng, Wentao and Paudel, Danda and Yang, Jufeng}, title = {ExtDM: Distribution Extrapolation Diffusion Model for Video Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19310-19320} }
Point Segment and Count: A Generalized Framework for Object Counting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zhizhong and Dai, Mingliang and Zhang, Yi and Zhang, Junping and Shan, Hongming}, title = {Point Segment and Count: A Generalized Framework for Object Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17067-17076} }
PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Kuan-Chih and Lyu, Weijie and Yang, Ming-Hsuan and Tsai, Yi-Hsuan}, title = {PTT: Point-Trajectory Transformer for Efficient Temporal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14938-14947} }
Generative Proxemics: A Prior for 3D Social Interaction from Images-
[pdf]
[supp]
[bibtex]@InProceedings{Muller_2024_CVPR, author = {M\"uller, Lea and Ye, Vickie and Pavlakos, Georgios and Black, Michael and Kanazawa, Angjoo}, title = {Generative Proxemics: A Prior for 3D Social Interaction from Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9687-9697} }
A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization-
[pdf]
[bibtex]@InProceedings{Ren_2024_CVPR, author = {Ren, Hongwei and Zhu, Jiadong and Zhou, Yue and Fu, Haotian and Huang, Yulong and Cheng, Bojun}, title = {A Simple and Effective Point-based Network for Event Camera 6-DOFs Pose Relocalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18112-18121} }
Region-Based Representations Revisited-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shlapentokh-Rothman_2024_CVPR, author = {Shlapentokh-Rothman, Michal and Blume, Ansel and Xiao, Yao and Wu, Yuqun and TV, Sethuraman and Tao, Heyi and Lee, Jae Yong and Torres, Wilfredo and Wang, Yu-Xiong and Hoiem, Derek}, title = {Region-Based Representations Revisited}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17107-17116} }
GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zifan and Chen, Junyu and Chen, Ziqing and Xie, Pengwei and Chen, Rui and Yi, Li}, title = {GenH2R: Learning Generalizable Human-to-Robot Handover via Scalable Simulation Demonstration and Imitation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16362-16372} }
Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mok_2024_CVPR, author = {Mok, Tony C. W. and Li, Zi and Bai, Yunhao and Zhang, Jianpeng and Liu, Wei and Zhou, Yan-Jie and Yan, Ke and Jin, Dakai and Shi, Yu and Yin, Xiaoli and Lu, Le and Zhang, Ling}, title = {Modality-Agnostic Structural Image Representation Learning for Deformable Multi-Modality Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11215-11225} }
Any-Shift Prompting for Generalization over Distributions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zehao and Shen, Jiayi and Derakhshani, Mohammad Mahdi and Liao, Shengcai and Snoek, Cees G. M.}, title = {Any-Shift Prompting for Generalization over Distributions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13849-13860} }
CPR-Coach: Recognizing Composite Error Actions based on Single-class Training-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shunli and Wang, Shuaibing and Yang, Dingkang and Li, Mingcheng and Kuang, Haopeng and Zhao, Xiao and Su, Liuzhen and Zhai, Peng and Zhang, Lihua}, title = {CPR-Coach: Recognizing Composite Error Actions based on Single-class Training}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18782-18792} }
RTracker: Recoverable Tracking via PN Tree Structured Memory-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Yuqing and Li, Xin and Zhou, Zikun and Wang, Yaowei and He, Zhenyu and Yang, Ming-Hsuan}, title = {RTracker: Recoverable Tracking via PN Tree Structured Memory}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19038-19047} }
DualAD: Disentangling the Dynamic and Static World for End-to-End Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Doll_2024_CVPR, author = {Doll, Simon and Hanselmann, Niklas and Schneider, Lukas and Schulz, Richard and Cordts, Marius and Enzweiler, Markus and Lensch, Hendrik P. A.}, title = {DualAD: Disentangling the Dynamic and Static World for End-to-End Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14728-14737} }
MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Micorek_2024_CVPR, author = {Micorek, Jakub and Possegger, Horst and Narnhofer, Dominik and Bischof, Horst and Kozinski, Mateusz}, title = {MULDE: Multiscale Log-Density Estimation via Denoising Score Matching for Video Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18868-18877} }
PTQ4SAM: Post-Training Quantization for Segment Anything-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lv_2024_CVPR, author = {Lv, Chengtao and Chen, Hong and Guo, Jinyang and Ding, Yifu and Liu, Xianglong}, title = {PTQ4SAM: Post-Training Quantization for Segment Anything}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15941-15951} }
Improving Bird's Eye View Semantic Segmentation by Task Decomposition-
[pdf]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Tianhao and Chen, Yongcan and Wu, Yu and Liu, Tianyang and Du, Bo and Xiao, Peilun and Qiu, Shi and Yang, Hongda and Li, Guozhen and Yang, Yi and Lin, Yutian}, title = {Improving Bird's Eye View Semantic Segmentation by Task Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15512-15521} }
Scene Adaptive Sparse Transformer for Event-based Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Yansong and Li, Hebei and Zhang, Yueyi and Sun, Xiaoyan and Wu, Feng}, title = {Scene Adaptive Sparse Transformer for Event-based Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16794-16804} }
CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zheng_2024_CVPR, author = {Zheng, Qixuan and Zhang, Ming and Yan, Hong}, title = {CURSOR: Scalable Mixed-Order Hypergraph Matching with CUR Decomposition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16036-16045} }
GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes-
[pdf]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Haozhe and Wei, Chunyu and He, Li and Guo, Yuchen and Zhao, Yunqi and Li, Shanglong and Fang, Lu}, title = {GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19331-19340} }
C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation-
[pdf]
[supp]
[bibtex]@InProceedings{Huo_2024_CVPR, author = {Huo, Fushuo and Xu, Wenchao and Guo, Jingcai and Wang, Haozhao and Guo, Song}, title = {C2KD: Bridging the Modality Gap for Cross-Modal Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16006-16015} }
Traceable Federated Continual Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Qiang and Liu, Bingyan and Li, Yawen}, title = {Traceable Federated Continual Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12872-12881} }
V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Penghao and Xie, Saining}, title = {V?: Guided Visual Search as a Core Mechanism in Multimodal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13084-13094} }
Uncertainty Visualization via Low-Dimensional Posterior Projections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yair_2024_CVPR, author = {Yair, Omer and Nehme, Elias and Michaeli, Tomer}, title = {Uncertainty Visualization via Low-Dimensional Posterior Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11041-11051} }
VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Ziyang and Liu, Nian and Zhao, Wangbo and Yang, Xuguang and Zhang, Dingwen and Fan, Deng-Ping and Khan, Fahad and Han, Junwei}, title = {VSCode: General Visual Salient and Camouflaged Object Detection with 2D Prompt Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17169-17180} }
PointInfinity: Resolution-Invariant Point Diffusion Models-
[pdf]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixuan and Johnson, Justin and Debnath, Shoubhik and Rehg, James M. and Wu, Chao-Yuan}, title = {PointInfinity: Resolution-Invariant Point Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10050-10060} }
Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization-
[pdf]
[supp]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Zhi-Fan and Mao, Chaojie and Wang, Xue and Jiang, Jianwen and Lv, Yiliang and Jin, Rong}, title = {Structured Model Probing: Empowering Efficient Transfer Learning by Structured Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16849-16858} }
Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yao_2024_CVPR, author = {Yao, Jiawei and Qian, Qi and Hu, Juhua}, title = {Multi-Modal Proxy Learning Towards Personalized Visual Multiple Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14066-14075} }
ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bourriez_2024_CVPR, author = {Bourriez, Nicolas and Bendidi, Ihab and Cohen, Ethan and Watkinson, Gabriel and Sanchez, Maxime and Bollot, Guillaume and Genovesio, Auguste}, title = {ChAda-ViT : Channel Adaptive Attention for Joint Representation Learning of Heterogeneous Microscopy Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11556-11565} }
CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lai_2024_CVPR, author = {Lai, Haoran and Yao, Qingsong and Jiang, Zihang and Wang, Rongsheng and He, Zhiyang and Tao, Xiaodong and Zhou, S. Kevin}, title = {CARZero: Cross-Attention Alignment for Radiology Zero-Shot Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11137-11146} }
Multi-Modal Hallucination Control by Visual Information Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Favero_2024_CVPR, author = {Favero, Alessandro and Zancato, Luca and Trager, Matthew and Choudhary, Siddharth and Perera, Pramuditha and Achille, Alessandro and Swaminathan, Ashwin and Soatto, Stefano}, title = {Multi-Modal Hallucination Control by Visual Information Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14303-14312} }
The Neglected Tails in Vision-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Parashar_2024_CVPR, author = {Parashar, Shubham and Lin, Zhiqiu and Liu, Tian and Dong, Xiangjue and Li, Yanan and Ramanan, Deva and Caverlee, James and Kong, Shu}, title = {The Neglected Tails in Vision-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12988-12997} }
Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiaming and Zhang, Jiacheng and Li, Jichang and Li, Ge and Liu, Si and Lin, Liang and Li, Guanbin}, title = {Learning Background Prompts to Discover Implicit Knowledge for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16678-16687} }
Towards Accurate Post-training Quantization for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Changyuan and Wang, Ziwei and Xu, Xiuwei and Tang, Yansong and Zhou, Jie and Lu, Jiwen}, title = {Towards Accurate Post-training Quantization for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16026-16035} }
GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Khanna_2024_CVPR, author = {Khanna, Mukul and Ramrakhya, Ram and Chhablani, Gunjan and Yenamandra, Sriram and Gervet, Theophile and Chang, Matthew and Kira, Zsolt and Chaplot, Devendra Singh and Batra, Dhruv and Mottaghi, Roozbeh}, title = {GOAT-Bench: A Benchmark for Multi-Modal Lifelong Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16373-16383} }
Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation-
[pdf]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Shuting and Ding, Henghui}, title = {Decoupling Static and Hierarchical Motion Perception for Referring Video Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13332-13341} }
Dense Vision Transformer Compression with Few Samples-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hanxiao and Zhou, Yifan and Wang, Guo-Hua}, title = {Dense Vision Transformer Compression with Few Samples}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15825-15834} }
Masked AutoDecoder is Effective Multi-Task Vision Generalist-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qiu_2024_CVPR, author = {Qiu, Han and Huang, Jiaxing and Gao, Peng and Lu, Lewei and Zhang, Xiaoqin and Lu, Shijian}, title = {Masked AutoDecoder is Effective Multi-Task Vision Generalist}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14152-14161} }
Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration-
[pdf]
[arXiv]
[bibtex]@InProceedings{Meng_2024_CVPR, author = {Meng, Mingyuan and Feng, Dagan and Bi, Lei and Kim, Jinman}, title = {Correlation-aware Coarse-to-fine MLPs for Deformable Medical Image Registration}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9645-9654} }
Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhu_2024_CVPR, author = {Zhu, Jiawen and Pang, Guansong}, title = {Toward Generalist Anomaly Detection via In-context Residual Learning with Few-shot Sample Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17826-17836} }
Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Vaish_2024_CVPR, author = {Vaish, Puru and Wang, Shunxin and Strisciuglio, Nicola}, title = {Fourier-basis Functions to Bridge Augmentation Gap: Rethinking Frequency Augmentation in Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17763-17772} }
PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar-
[pdf]
[supp]
[bibtex]@InProceedings{Klinghoffer_2024_CVPR, author = {Klinghoffer, Tzofi and Xiang, Xiaoyu and Somasundaram, Siddharth and Fan, Yuchen and Richardt, Christian and Raskar, Ramesh and Ranjan, Rakesh}, title = {PlatoNeRF: 3D Reconstruction in Plato's Cave via Single-View Two-Bounce Lidar}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14565-14574} }
An Interactive Navigation Method with Effect-oriented Affordance-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Xiaohan and Liu, Yuehu and Song, Xinhang and Liu, Yuyi and Zhang, Sixian and Jiang, Shuqiang}, title = {An Interactive Navigation Method with Effect-oriented Affordance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16446-16456} }
PREGO: Online Mistake Detection in PRocedural EGOcentric Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Flaborea_2024_CVPR, author = {Flaborea, Alessandro and di Melendugno, Guido Maria D'Amely and Plini, Leonardo and Scofano, Luca and De Matteis, Edoardo and Furnari, Antonino and Farinella, Giovanni Maria and Galasso, Fabio}, title = {PREGO: Online Mistake Detection in PRocedural EGOcentric Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18483-18492} }
Logit Standardization in Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shangquan and Ren, Wenqi and Li, Jingzhi and Wang, Rui and Cao, Xiaochun}, title = {Logit Standardization in Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15731-15740} }
Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Fan and Hua, Xian-Sheng and Chen, Chong and Luo, Xiao}, title = {Fine-grained Prototypical Voting with Heterogeneous Mixup for Semi-supervised 2D-3D Cross-modal Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17016-17026} }
Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Joshua C. and Dabholkar, Ahaan and Sharma, Atul and Bagchi, Saurabh}, title = {Leak and Learn: An Attacker's Cookbook to Train Using Leaked Data from Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12247-12256} }
OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jeong_2024_CVPR, author = {Jeong, Jisoo and Cai, Hong and Garrepalli, Risheek and Lin, Jamie Menjay and Hayat, Munawar and Porikli, Fatih}, title = {OCAI: Improving Optical Flow Estimation by Occlusion and Consistency Aware Interpolation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19352-19362} }
Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Koley, Subhadeep and Das, Ayan and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {Doodle Your 3D: From Abstract Freehand Sketches to Precise 3D Shapes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9795-9805} }
Single View Refractive Index Tomography with Neural Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Brandon and Levis, Aviad and Connor, Liam and Srinivasan, Pratul P. and Bouman, Katherine L.}, title = {Single View Refractive Index Tomography with Neural Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {25358-25367} }
XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Lyu, Fei and Lu, Jiahao and Darkner, Sune and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {XFibrosis: Explicit Vessel-Fiber Modeling for Fibrosis Staging from Liver Pathology Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11282-11291} }
UnO: Unsupervised Occupancy Fields for Perception and Forecasting-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Agro_2024_CVPR, author = {Agro, Ben and Sykora, Quinlan and Casas, Sergio and Gilles, Thomas and Urtasun, Raquel}, title = {UnO: Unsupervised Occupancy Fields for Perception and Forecasting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14487-14496} }
SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Boyuan and Xu, Zhuo and Kirmani, Sean and Ichter, Brain and Sadigh, Dorsa and Guibas, Leonidas and Xia, Fei}, title = {SpatialVLM: Endowing Vision-Language Models with Spatial Reasoning Capabilities}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14455-14465} }
InstructDiffusion: A Generalist Modeling Interface for Vision Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Geng_2024_CVPR, author = {Geng, Zigang and Yang, Binxin and Hang, Tiankai and Li, Chen and Gu, Shuyang and Zhang, Ting and Bao, Jianmin and Zhang, Zheng and Li, Houqiang and Hu, Han and Chen, Dong and Guo, Baining}, title = {InstructDiffusion: A Generalist Modeling Interface for Vision Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12709-12720} }
Gated Fields: Learning Scene Reconstruction from Gated Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ramazzina_2024_CVPR, author = {Ramazzina, Andrea and Walz, Stefanie and Dahal, Pragyan and Bijelic, Mario and Heide, Felix}, title = {Gated Fields: Learning Scene Reconstruction from Gated Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10530-10541} }
RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bang_2024_CVPR, author = {Bang, Geonho and Choi, Kwangjin and Kim, Jisong and Kum, Dongsuk and Choi, Jun Won}, title = {RadarDistill: Boosting Radar-based Object Detection Performance via Knowledge Distillation from LiDAR Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15491-15500} }
Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jihyun and Oh, Changjae and Do, Hoseok and Kim, Soohyun and Sohn, Kwanghoon}, title = {Diffusion-driven GAN Inversion for Multi-Modal Face Image Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10403-10412} }
Low-Rank Knowledge Decomposition for Medical Foundation Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Yuhang and Li, Haolin and Du, Siyuan and Yao, Jiangchao and Zhang, Ya and Wang, Yanfeng}, title = {Low-Rank Knowledge Decomposition for Medical Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11611-11620} }
Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cui_2024_CVPR, author = {Cui, Qi and Meng, Ruohan and Xu, Chaohui and Chang, Chip-Hong}, title = {Steganographic Passport: An Owner and User Verifiable Credential for Deep Model IP Protection Without Retraining}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12302-12311} }
En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Lei, Biwen and Yao, Yuan and Cui, Miaomiao and Lian, Zhouhui and Xie, Xuansong}, title = {En3D: An Enhanced Generative Model for Sculpting 3D Humans from 2D Synthetic Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9981-9991} }
Neural Visibility Field for Uncertainty-Driven Active Mapping-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xue_2024_CVPR, author = {Xue, Shangjie and Dill, Jesse and Mathur, Pranay and Dellaert, Frank and Tsiotra, Panagiotis and Xu, Danfei}, title = {Neural Visibility Field for Uncertainty-Driven Active Mapping}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18122-18132} }
Tri-Perspective View Decomposition for Geometry-Aware Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Zhiqiang and Lin, Yuankai and Wang, Kun and Zheng, Yupeng and Wang, Yufei and Zhang, Zhenyu and Li, Jun and Yang, Jian}, title = {Tri-Perspective View Decomposition for Geometry-Aware Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4874-4884} }
Relaxed Contrastive Learning for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Seo_2024_CVPR, author = {Seo, Seonguk and Kim, Jinkyu and Kim, Geeho and Han, Bohyung}, title = {Relaxed Contrastive Learning for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12279-12288} }
Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tan_2024_CVPR, author = {Tan, Wentan and Ding, Changxing and Jiang, Jiayu and Wang, Fei and Zhan, Yibing and Tao, Dapeng}, title = {Harnessing the Power of MLLMs for Transferable Text-to-Image Person ReID}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17127-17137} }
Weakly Supervised Video Individual Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xinyan and Li, Guorong and Qi, Yuankai and Yan, Ziheng and Han, Zhenjun and van den Hengel, Anton and Yang, Ming-Hsuan and Huang, Qingming}, title = {Weakly Supervised Video Individual Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19228-19237} }
Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Zijin and Zeng, Kai and Chen, Kejiang and Fang, Han and Zhang, Weiming and Yu, Nenghai}, title = {Gaussian Shading: Provable Performance-Lossless Image Watermarking for Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12162-12171} }
DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jiaxin and Peng, Dezhi and Liu, Chongyu and Zhang, Peirong and Jin, Lianwen}, title = {DocRes: A Generalist Model Toward Unifying Document Image Restoration Tasks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15654-15664} }
Honeybee: Locality-enhanced Projector for Multimodal LLM-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cha_2024_CVPR, author = {Cha, Junbum and Kang, Wooyoung and Mun, Jonghwan and Roh, Byungseok}, title = {Honeybee: Locality-enhanced Projector for Multimodal LLM}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13817-13827} }
Learned Trajectory Embedding for Subspace Clustering-
[pdf]
[supp]
[bibtex]@InProceedings{Lochman_2024_CVPR, author = {Lochman, Yaroslava and Olsson, Carl and Zach, Christopher}, title = {Learned Trajectory Embedding for Subspace Clustering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19092-19102} }
HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Woo_2024_CVPR, author = {Woo, Sangmin and Park, Byeongjun and Go, Hyojun and Kim, Jin-Young and Kim, Changick}, title = {HarmonyView: Harmonizing Consistency and Diversity in One-Image-to-3D}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10574-10584} }
UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yuan_2024_CVPR, author = {Yuan, Shuai and Luo, Lei and Hui, Zhuo and Pu, Can and Xiang, Xiaoyu and Ranjan, Rakesh and Demandolx, Denis}, title = {UnSAMFlow: Unsupervised Optical Flow Guided by Segment Anything Model}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19027-19037} }
Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Deng_2024_CVPR, author = {Deng, Wenxiao and Li, Wenbin and Ding, Tianyu and Wang, Lei and Zhang, Hongguang and Huang, Kuihua and Huo, Jing and Gao, Yang}, title = {Exploiting Inter-sample and Inter-feature Relations in Dataset Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17057-17066} }
Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yun and Liu, Zhe and Chen, Hang and Yao, Lina}, title = {Context-based and Diversity-driven Specificity in Compositional Zero-Shot Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17037-17046} }
Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Guangyuan and Rao, Chen and Mo, Juncheng and Zhang, Zhanjie and Xing, Wei and Zhao, Lei}, title = {Rethinking Diffusion Model for Multi-Contrast MRI Super-Resolution}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11365-11374} }
Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization-
[pdf]
[supp]
[bibtex]@InProceedings{Singha_2024_CVPR, author = {Singha, Mainak and Jha, Ankit and Bose, Shirsha and Nair, Ashwin and Abdar, Moloud and Banerjee, Biplab}, title = {Unknown Prompt the only Lacuna: Unveiling CLIP's Potential for Open Domain Generalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13309-13319} }
From Coarse to Fine-Grained Open-Set Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Lang_2024_CVPR, author = {Lang, Nico and Sn{\ae}bjarnarson, V\'esteinn and Cole, Elijah and Mac Aodha, Oisin and Igel, Christian and Belongie, Serge}, title = {From Coarse to Fine-Grained Open-Set Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17804-17814} }
OmniViD: A Generative Framework for Universal Video Understanding-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Junke and Chen, Dongdong and Luo, Chong and He, Bo and Yuan, Lu and Wu, Zuxuan and Jiang, Yu-Gang}, title = {OmniViD: A Generative Framework for Universal Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18209-18220} }
Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Feng_2024_CVPR, author = {Feng, Chun and Hsu, Joy and Liu, Weiyu and Wu, Jiajun}, title = {Naturally Supervised 3D Visual Grounding with Language-Regularized Concept Learners}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13269-13278} }
CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yiyu and Fan, Zheyi and Chen, Zhaoru and Zhu, Yixuan}, title = {CA-Jaccard: Camera-aware Jaccard Distance for Person Re-identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17532-17541} }
AutoAD III: The Prequel - Back to the Pixels-
[pdf]
[bibtex]@InProceedings{Han_2024_CVPR, author = {Han, Tengda and Bain, Max and Nagrani, Arsha and Varol, G\"ul and Xie, Weidi and Zisserman, Andrew}, title = {AutoAD III: The Prequel - Back to the Pixels}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18164-18174} }
Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Zhen-Duo and Zhao, Li-Jun and Zhang, Zi-Chao and Luo, Xin and Xu, Xin-Shun}, title = {Characteristics Matching Based Hash Codes Generation for Efficient Fine-grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17273-17281} }
Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Barroso-Laguna_2024_CVPR, author = {Barroso-Laguna, Axel and Munukutla, Sowmya and Prisacariu, Victor Adrian and Brachmann, Eric}, title = {Matching 2D Images in 3D: Metric Relative Pose from Metric Correspondences}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {4852-4863} }
M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Pu_2024_CVPR, author = {Pu, Bin and Wang, Liwen and Yang, Jiewen and He, Guannan and Dong, Xingbo and Li, Shengli and Tan, Ying and Chen, Ming and Jin, Zhe and Li, Kenli and Li, Xiaomeng}, title = {M3-UDA: A New Benchmark for Unsupervised Domain Adaptive Fetal Cardiac Structure Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11621-11630} }
Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding-
[pdf]
[supp]
[bibtex]@InProceedings{Jin_2024_CVPR, author = {Jin, Peng and Takanobu, Ryuichi and Zhang, Wancai and Cao, Xiaochun and Yuan, Li}, title = {Chat-UniVi: Unified Visual Representation Empowers Large Language Models with Image and Video Understanding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13700-13710} }
Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Junyi and Duan, Bin and Kang, Weitai and Tang, Hao and Yan, Yan}, title = {Token Transformation Matters: Towards Faithful Post-hoc Explanation for Vision Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10926-10935} }
Bayesian Differentiable Physics for Cloth Digitalization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gong_2024_CVPR, author = {Gong, Deshan and Mao, Ningtao and Wang, He}, title = {Bayesian Differentiable Physics for Cloth Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11841-11851} }
Higher-order Relational Reasoning for Pedestrian Trajectory Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Sungjune and Chi, Hyung-gun and Lim, Hyerin and Ramani, Karthik and Kim, Jinkyu and Kim, Sangpil}, title = {Higher-order Relational Reasoning for Pedestrian Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15251-15260} }
RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ximiao and Xu, Min and Zhou, Xiuzhuang}, title = {RealNet: A Feature Selection Network with Realistic Synthetic Anomaly for Anomaly Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16699-16708} }
Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{He_2024_CVPR, author = {He, Junwen and Wang, Yifan and Wang, Lijun and Lu, Huchuan and He, Jun-Yan and Lan, Jin-Peng and Luo, Bin and Xie, Xuansong}, title = {Multi-modal Instruction Tuned LLMs with Fine-grained Visual Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13980-13990} }
LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs-
[pdf]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Yunsheng and Cui, Can and Cao, Xu and Ye, Wenqian and Liu, Peiran and Lu, Juanwu and Abdelraouf, Amr and Gupta, Rohit and Han, Kyungtae and Bera, Aniket and Rehg, James M. and Wang, Ziran}, title = {LaMPilot: An Open Benchmark Dataset for Autonomous Driving with Language Model Programs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15141-15151} }
FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Slyman_2024_CVPR, author = {Slyman, Eric and Lee, Stefan and Cohen, Scott and Kafle, Kushal}, title = {FairDeDup: Detecting and Mitigating Vision-Language Fairness Disparities in Semantic Dataset Deduplication}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13905-13916} }
Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Ju-Hyeon and Syazwany, Nur Suriza and Kim, Su Jung and Lee, Sang-Chul}, title = {Modality-agnostic Domain Generalizable Medical Image Segmentation by Multi-Frequency in Multi-Scale Attention}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11480-11491} }
Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hao and Yang, Xue and Wang, Zhaokai and Zhu, Xizhou and Zhou, Jie and Qiao, Yu and Wang, Xiaogang and Li, Hongsheng and Lu, Lewei and Dai, Jifeng}, title = {Auto MC-Reward: Automated Dense Reward Design with Large Language Models for Minecraft}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16426-16435} }
GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Sungphill and Son, Hyeontae and Hur, Dongcheol and Kim, Sangwook}, title = {GenFlow: Generalizable Recurrent Flow for 6D Pose Refinement of Novel Objects}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10039-10049} }
Logarithmic Lenses: Exploring Log RGB Data for Image Classification-
[pdf]
[bibtex]@InProceedings{Maxwell_2024_CVPR, author = {Maxwell, Bruce A. and Singhania, Sumegha and Patel, Avnish and Kumar, Rahul and Fryling, Heather and Li, Sihan and Sun, Haonan and He, Ping and Li, Zewen}, title = {Logarithmic Lenses: Exploring Log RGB Data for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17470-17479} }
Scaled Decoupled Distillation-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Shicai and Luo, Chunbo and Luo, Yang}, title = {Scaled Decoupled Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15975-15983} }
Cloud-Device Collaborative Learning for Multimodal Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Guanqun and Liu, Jiaming and Li, Chenxuan and Zhang, Yuan and Ma, Junpeng and Wei, Xinyu and Zhang, Kevin and Chong, Maurice and Zhang, Renrui and Liu, Yijiang and Zhang, Shanghang}, title = {Cloud-Device Collaborative Learning for Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12646-12655} }
KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yu and Li, Xin and Weng, Shengzhao and Zhang, Gang and Yue, Haixiao and Feng, Haocheng and Han, Junyu and Ding, Errui}, title = {KD-DETR: Knowledge Distillation for Detection Transformer with Consistent Distillation Points Sampling}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16016-16025} }
EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rahman_2024_CVPR, author = {Rahman, Md Mostafijur and Munir, Mustafa and Marculescu, Radu}, title = {EMCAD: Efficient Multi-scale Convolutional Attention Decoding for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11769-11779} }
MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zhicheng and Zhao, Pancheng and Park, Eunil and Yang, Jufeng}, title = {MART: Masked Affective RepresenTation Learning via Masked Temporal Distribution Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12830-12840} }
MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Agiza_2024_CVPR, author = {Agiza, Ahmed and Neseem, Marina and Reda, Sherief}, title = {MTLoRA: Low-Rank Adaptation Approach for Efficient Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16196-16205} }
Motion Blur Decomposition with Cross-shutter Guidance-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ji_2024_CVPR, author = {Ji, Xiang and Jiang, Haiyang and Zheng, Yinqiang}, title = {Motion Blur Decomposition with Cross-shutter Guidance}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12534-12543} }
Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Xiaowei and Liu, Xianglong and Wang, Duorui and Gao, Yajun and Liu, Zhide}, title = {Scene-adaptive and Region-aware Multi-modal Prompt for Open Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16741-16750} }
Instance-Aware Group Quantization for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Moon_2024_CVPR, author = {Moon, Jaehyeon and Kim, Dohyung and Cheon, Junyong and Ham, Bumsub}, title = {Instance-Aware Group Quantization for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16132-16141} }
A General and Efficient Training for Transformer via Token Expansion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Wenxuan and Shen, Yunhang and Xie, Jiao and Zhang, Baochang and He, Gaoqi and Li, Ke and Sun, Xing and Lin, Shaohui}, title = {A General and Efficient Training for Transformer via Token Expansion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15783-15792} }
Tyche: Stochastic In-Context Learning for Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Rakic_2024_CVPR, author = {Rakic, Marianne and Wong, Hallee E. and Ortiz, Jose Javier Gonzalez and Cimini, Beth A. and Guttag, John V. and Dalca, Adrian V.}, title = {Tyche: Stochastic In-Context Learning for Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11159-11173} }
YOLO-World: Real-Time Open-Vocabulary Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Tianheng and Song, Lin and Ge, Yixiao and Liu, Wenyu and Wang, Xinggang and Shan, Ying}, title = {YOLO-World: Real-Time Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16901-16911} }
Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Xiaoyu and Cai, Miaomiao and Chen, Yinda and Zhang, Yueyi and Shi, Te and Zhang, Ruobing and Chen, Xuejin and Xiong, Zhiwei}, title = {Cross-Dimension Affinity Distillation for 3D EM Neuron Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11104-11113} }
Producing and Leveraging Online Map Uncertainty in Trajectory Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gu_2024_CVPR, author = {Gu, Xunjiang and Song, Guanyu and Gilitschenski, Igor and Pavone, Marco and Ivanovic, Boris}, title = {Producing and Leveraging Online Map Uncertainty in Trajectory Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14521-14530} }
LASO: Language-guided Affordance Segmentation on 3D Object-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yicong and Zhao, Na and Xiao, Junbin and Feng, Chun and Wang, Xiang and Chua, Tat-seng}, title = {LASO: Language-guided Affordance Segmentation on 3D Object}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14251-14260} }
Riemannian Multinomial Logistics Regression for SPD Neural Networks-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziheng and Song, Yue and Liu, Gaowen and Kompella, Ramana Rao and Wu, Xiao-Jun and Sebe, Nicu}, title = {Riemannian Multinomial Logistics Regression for SPD Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17086-17096} }
What Sketch Explainability Really Means for Downstream Tasks?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bandyopadhyay_2024_CVPR, author = {Bandyopadhyay, Hmrishav and Chowdhury, Pinaki Nath and Bhunia, Ayan Kumar and Sain, Aneeshan and Xiang, Tao and Song, Yi-Zhe}, title = {What Sketch Explainability Really Means for Downstream Tasks?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10997-11008} }
Neural Exposure Fusion for High-Dynamic Range Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Onzon_2024_CVPR, author = {Onzon, Emmanuel and B\"omer, Maximilian and Mannan, Fahim and Heide, Felix}, title = {Neural Exposure Fusion for High-Dynamic Range Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17564-17573} }
SFOD: Spiking Fusion Object Detector-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Yimeng and Zhang, Wei and Liu, Changsong and Li, Mingyang and Lu, Wenrui}, title = {SFOD: Spiking Fusion Object Detector}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17191-17200} }
OpenEQA: Embodied Question Answering in the Era of Foundation Models-
[pdf]
[supp]
[bibtex]@InProceedings{Majumdar_2024_CVPR, author = {Majumdar, Arjun and Ajay, Anurag and Zhang, Xiaohan and Putta, Pranav and Yenamandra, Sriram and Henaff, Mikael and Silwal, Sneha and Mcvay, Paul and Maksymets, Oleksandr and Arnaud, Sergio and Yadav, Karmesh and Li, Qiyang and Newman, Ben and Sharma, Mohit and Berges, Vincent and Zhang, Shiqi and Agrawal, Pulkit and Bisk, Yonatan and Batra, Dhruv and Kalakrishnan, Mrinal and Meier, Franziska and Paxton, Chris and Sax, Alexander and Rajeswaran, Aravind}, title = {OpenEQA: Embodied Question Answering in the Era of Foundation Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16488-16498} }
DePT: Decoupled Prompt Tuning-
[pdf]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Ji and Wu, Shihan and Gao, Lianli and Shen, Heng Tao and Song, Jingkuan}, title = {DePT: Decoupled Prompt Tuning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12924-12933} }
A Generative Approach for Wikipedia-Scale Visual Entity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Caron_2024_CVPR, author = {Caron, Mathilde and Iscen, Ahmet and Fathi, Alireza and Schmid, Cordelia}, title = {A Generative Approach for Wikipedia-Scale Visual Entity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17313-17322} }
Open-Vocabulary Object 6D Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Corsetti_2024_CVPR, author = {Corsetti, Jaime and Boscaini, Davide and Oh, Changjae and Cavallaro, Andrea and Poiesi, Fabio}, title = {Open-Vocabulary Object 6D Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18071-18080} }
Plug and Play Active Learning for Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Chenhongyi and Huang, Lichao and Crowley, Elliot J.}, title = {Plug and Play Active Learning for Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17784-17793} }
LiSA: LiDAR Localization with Semantic Awareness-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bochun and Li, Zijun and Li, Wen and Cai, Zhipeng and Wen, Chenglu and Zang, Yu and Muller, Matthias and Wang, Cheng}, title = {LiSA: LiDAR Localization with Semantic Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15271-15280} }
LMDrive: Closed-Loop End-to-End Driving with Large Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Hao and Hu, Yuxuan and Wang, Letian and Song, Guanglu and Waslander, Steven L. and Liu, Yu and Li, Hongsheng}, title = {LMDrive: Closed-Loop End-to-End Driving with Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15120-15130} }
AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Yan_2024_CVPR, author = {Yan, Sixing and Cheung, William K. and Tsang, Ivor W. and Chiu, Keith and Tong, Terence M. and Cheung, Ka Chun and See, Simon}, title = {AHIVE: Anatomy-aware Hierarchical Vision Encoding for Interactive Radiology Report Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14324-14333} }
CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Jun and Qin, Yuzhe and Kuang, Kaiming and Korkmaz, Yigit and Gurumoorthy, Akhilan and Su, Hao and Wang, Xiaolong}, title = {CyberDemo: Augmenting Simulated Human Demonstration for Real-World Dexterous Manipulation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17952-17963} }
MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Abdelfattah_2024_CVPR, author = {Abdelfattah, Mohamed and Hassan, Mariam and Alahi, Alexandre}, title = {MaskCLR: Attention-Guided Contrastive Learning for Robust Action Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18678-18687} }
Narrative Action Evaluation with Prompt-Guided Multimodal Interaction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Shiyi and Bai, Sule and Chen, Guangyi and Chen, Lei and Lu, Jiwen and Wang, Junle and Tang, Yansong}, title = {Narrative Action Evaluation with Prompt-Guided Multimodal Interaction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18430-18439} }
R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization-
[pdf]
[supp]
[bibtex]@InProceedings{Chan_2024_CVPR, author = {Chan, Kennard Yanting and Liu, Fayao and Lin, Guosheng and Foo, Chuan Sheng and Lin, Weisi}, title = {R-Cyclic Diffuser: Reductive and Cyclic Latent Diffusion for 3D Clothed Human Digitalization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10304-10313} }
Validating Privacy-Preserving Face Recognition under a Minimum Assumption-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Hui and Dong, Xingbo and Lai, YenLung and Zhou, Ying and Zhang, Xiaoyan and Lv, Xingguo and Jin, Zhe and Li, Xuejun}, title = {Validating Privacy-Preserving Face Recognition under a Minimum Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12205-12214} }
Long-Tailed Anomaly Detection with Learnable Class Names-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ho_2024_CVPR, author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno}, title = {Long-Tailed Anomaly Detection with Learnable Class Names}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12435-12446} }
Rapid 3D Model Generation with Intuitive 3D Input-
[pdf]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianrun and Ding, Chaotao and Zhang, Shangzhan and Yu, Chunan and Zang, Ying and Li, Zejian and Peng, Sida and Sun, Lingyun}, title = {Rapid 3D Model Generation with Intuitive 3D Input}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12554-12564} }
BoQ: A Place is Worth a Bag of Learnable Queries-
[pdf]
[supp]
[bibtex]@InProceedings{Ali-bey_2024_CVPR, author = {Ali-bey, Amar and Chaib-draa, Brahim and Gigu\`ere, Philippe}, title = {BoQ: A Place is Worth a Bag of Learnable Queries}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17794-17803} }
GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Van Nguyen and Groueix, Thibault and Salzmann, Mathieu and Lepetit, Vincent}, title = {GigaPose: Fast and Robust Novel Object Pose Estimation via One Correspondence}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9903-9913} }
Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Sixian and Yu, Xinyao and Song, Xinhang and Wang, Xiaohan and Jiang, Shuqiang}, title = {Imagine Before Go: Self-Supervised Generative Map for Object Goal Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16414-16425} }
HIPTrack: Visual Tracking with Historical Prompts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cai_2024_CVPR, author = {Cai, Wenrui and Liu, Qingjie and Wang, Yunhong}, title = {HIPTrack: Visual Tracking with Historical Prompts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19258-19267} }
An N-Point Linear Solver for Line and Motion Estimation with Event Cameras-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Ling and Gehrig, Daniel and Su, Hang and Scaramuzza, Davide and Kneip, Laurent}, title = {An N-Point Linear Solver for Line and Motion Estimation with Event Cameras}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14596-14605} }
GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiao and Li, Quanyi and Wang, Tai and Xue, Tianfan and Pang, Jiangmiao}, title = {GenNBV: Generalizable Next-Best-View Policy for Active 3D Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16436-16445} }
Taming Self-Training for Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shiyu and Schulter, Samuel and Zhao, Long and Zhang, Zhixing and G, Vijay Kumar B and Suh, Yumin and Chandraker, Manmohan and Metaxas, Dimitris N.}, title = {Taming Self-Training for Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13938-13947} }
Bilateral Propagation Network for Depth Completion-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Jie and Tian, Fei-Peng and An, Boshi and Li, Jian and Tan, Ping}, title = {Bilateral Propagation Network for Depth Completion}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9763-9772} }
Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection-
[pdf]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Ke and Wang, Di and Hu, Zhangyuan and Zhu, Wenxuan and Li, Shaofeng and Wang, Quan}, title = {Unleashing Channel Potential: Space-Frequency Selection Convolution for SAR Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17323-17332} }
READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning-
[pdf]
[supp]
[bibtex]@InProceedings{Oba_2024_CVPR, author = {Oba, Takeru and Walter, Matthew and Ukita, Norimichi}, title = {READ: Retrieval-Enhanced Asymmetric Diffusion for Motion Planning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17974-17984} }
OVMR: Open-Vocabulary Recognition with Multi-Modal References-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Zehong and Zhang, Shiliang and Wei, Longhui and Tian, Qi}, title = {OVMR: Open-Vocabulary Recognition with Multi-Modal References}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16571-16581} }
Global and Local Prompts Cooperation via Optimal Transport for Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongxia and Huang, Wei and Wang, Jingya and Shi, Ye}, title = {Global and Local Prompts Cooperation via Optimal Transport for Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12151-12161} }
Retrieval-Augmented Open-Vocabulary Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kim_2024_CVPR, author = {Kim, Jooyeon and Cho, Eulrang and Kim, Sehyung and Kim, Hyunwoo J.}, title = {Retrieval-Augmented Open-Vocabulary Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17427-17436} }
MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Farina_2024_CVPR, author = {Farina, Matteo and Mancini, Massimiliano and Cunegatti, Elia and Liu, Gaowen and Iacca, Giovanni and Ricci, Elisa}, title = {MULTIFLOW: Shifting Towards Task-Agnostic Vision-Language Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16185-16195} }
Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Zongrui and Lu, Zhan and Yan, Haojie and Shi, Boxin and Pan, Gang and Zheng, Qian and Jiang, Xudong}, title = {Spin-UP: Spin Light for Natural Light Uncalibrated Photometric Stereo}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11905-11914} }
MemoNav: Working Memory Model for Visual Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Hongxin and Wang, Zeyu and Yang, Xu and Yang, Yuran and Mei, Shuqi and Zhang, Zhaoxiang}, title = {MemoNav: Working Memory Model for Visual Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17913-17922} }
AssistGUI: Task-Oriented PC Graphical User Interface Automation-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Difei and Ji, Lei and Bai, Zechen and Ouyang, Mingyu and Li, Peiran and Mao, Dongxing and Wu, Qinchen and Zhang, Weichen and Wang, Peiyi and Guo, Xiangwu and Wang, Hengxu and Zhou, Luowei and Shou, Mike Zheng}, title = {AssistGUI: Task-Oriented PC Graphical User Interface Automation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13289-13298} }
PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Anh-Quan and Dai, Angela and de Charette, Raoul}, title = {PaSCo: Urban 3D Panoptic Scene Completion with Uncertainty Awareness}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14554-14564} }
PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yandan and Jia, Baoxiong and Zhi, Peiyuan and Huang, Siyuan}, title = {PhyScene: Physically Interactable 3D Scene Synthesis for Embodied AI}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16262-16272} }
Harnessing Meta-Learning for Improving Full-Frame Video Stabilization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ali_2024_CVPR, author = {Ali, Muhammad Kashif and Im, Eun Woo and Kim, Dongjin and Kim, Tae Hyun}, title = {Harnessing Meta-Learning for Improving Full-Frame Video Stabilization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12605-12614} }
How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {How to Handle Sketch-Abstraction in Sketch-Based Image Retrieval?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16859-16869} }
ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval-
[pdf]
[arXiv]
[bibtex]@InProceedings{Fang_2024_CVPR, author = {Fang, Kaipeng and Song, Jingkuan and Gao, Lianli and Zeng, Pengpeng and Cheng, Zhi-Qi and Li, Xiyao and Shen, Heng Tao}, title = {ProS: Prompting-to-simulate Generalized knowledge for Universal Cross-Domain Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17292-17301} }
Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Du_2024_CVPR, author = {Du, Zhipeng and Shi, Miaojing and Deng, Jiankang}, title = {Boosting Object Detection with Zero-Shot Day-Night Domain Adaptation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12666-12676} }
Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Xiaoyang and Zheng, Hao and Li, Yuemeng and Ma, Yuncong and Ma, Liang and Li, Hongming and Fan, Yong}, title = {Versatile Medical Image Segmentation Learned from Multi-Source Datasets via Model Self-Disambiguation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11747-11756} }
Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liao_2024_CVPR, author = {Liao, Zhaohe and Li, Jiangtong and Niu, Li and Zhang, Liqing}, title = {Align and Aggregate: Compositional Reasoning with Video Alignment and Answer Aggregation for Video Question-Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13395-13404} }
Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Kung_2024_CVPR, author = {Kung, Chi-Hsi and Lu, Shu-Wei and Tsai, Yi-Hsuan and Chen, Yi-Ting}, title = {Action-slot: Visual Action-centric Representations for Multi-label Atomic Activity Recognition in Traffic Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18451-18461} }
Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tang_2024_CVPR, author = {Tang, Chen and Meng, Yuan and Jiang, Jiacheng and Xie, Shuzhao and Lu, Rongwei and Ma, Xinzhu and Wang, Zhi and Zhu, Wenwu}, title = {Retraining-Free Model Quantization via One-Shot Weight-Coupling Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15855-15865} }
EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Jiaxuan and Vo, Duc Minh and Sugimoto, Akihiro and Nakayama, Hideki}, title = {EVCap: Retrieval-Augmented Image Captioning with External Visual-Name Memory for Open-World Comprehension}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13733-13742} }
SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Zechuan and Yang, Zongxin and Yang, Yi}, title = {SIFU: Side-view Conditioned Implicit Function for Real-world Usable Clothed Human Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9936-9947} }
Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers-
[pdf]
[bibtex]@InProceedings{Xie_2024_CVPR, author = {Xie, Jinxia and Zhong, Bineng and Mo, Zhiyi and Zhang, Shengping and Shi, Liangtao and Song, Shuxiang and Ji, Rongrong}, title = {Autoregressive Queries for Adaptive Tracking with Spatio-Temporal Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19300-19309} }
Lane2Seq: Towards Unified Lane Detection via Sequence Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Kunyang}, title = {Lane2Seq: Towards Unified Lane Detection via Sequence Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16944-16953} }
LEMON: Learning 3D Human-Object Interaction Relation from 2D Images-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yuhang and Zhai, Wei and Luo, Hongchen and Cao, Yang and Zha, Zheng-Jun}, title = {LEMON: Learning 3D Human-Object Interaction Relation from 2D Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16284-16295} }
Understanding Video Transformers via Universal Concept Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kowal_2024_CVPR, author = {Kowal, Matthew and Dave, Achal and Ambrus, Rares and Gaidon, Adrien and Derpanis, Konstantinos G. and Tokmakov, Pavel}, title = {Understanding Video Transformers via Universal Concept Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10946-10956} }
PointOBB: Learning Oriented Object Detection via Single Point Supervision-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Luo_2024_CVPR, author = {Luo, Junwei and Yang, Xue and Yu, Yi and Li, Qingyun and Yan, Junchi and Li, Yansheng}, title = {PointOBB: Learning Oriented Object Detection via Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16730-16740} }
OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wan_2024_CVPR, author = {Wan, Jianqiang and Song, Sibo and Yu, Wenwen and Liu, Yuliang and Cheng, Wenqing and Huang, Fei and Bai, Xiang and Yao, Cong and Yang, Zhibo}, title = {OmniParser: A Unified Framework for Text Spotting Key Information Extraction and Table Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15641-15653} }
Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Yunhe}, title = {Training Like a Medical Resident: Context-Prior Learning Toward Universal Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11194-11204} }
MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hui_2024_CVPR, author = {Hui, Mude and Wei, Zihao and Zhu, Hongru and Xia, Fei and Zhou, Yuyin}, title = {MicroDiffusion: Implicit Representation-Guided Diffusion for 3D Reconstruction from Limited 2D Microscopy Projections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11460-11469} }
Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Marza_2024_CVPR, author = {Marza, Pierre and Matignon, Laetitia and Simonin, Olivier and Wolf, Christian}, title = {Task-Conditioned Adaptation of Visual Features in Multi-Task Policy Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17847-17856} }
Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Jinjing and Wei, Fangyun and Xu, Chang}, title = {Hybrid Proposal Refiner: Revisiting DETR Series from the Faster R-CNN Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17416-17426} }
Video Harmonization with Triplet Spatio-Temporal Variation Patterns-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Zonghui and Han, Xinyu and Zhang, Jie and Shan, Shiguang and Zheng, Haiyong}, title = {Video Harmonization with Triplet Spatio-Temporal Variation Patterns}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19177-19186} }
Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Saha_2024_CVPR, author = {Saha, Oindrila and Van Horn, Grant and Maji, Subhransu}, title = {Improved Zero-Shot Classification by Adapting VLMs with Text Descriptions}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17542-17552} }
CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lu_2024_CVPR, author = {Lu, Feng and Lan, Xiangyuan and Zhang, Lijun and Jiang, Dongmei and Wang, Yaowei and Yuan, Chun}, title = {CricaVPR: Cross-image Correlation-aware Representation Learning for Visual Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16772-16782} }
Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation-
[pdf]
[bibtex]@InProceedings{Bu_2024_CVPR, author = {Bu, Shenshen and Li, Taiji and Yang, Yuedong and Dai, Zhiming}, title = {Instance-level Expert Knowledge and Aggregate Discriminative Attention for Radiology Report Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14194-14204} }
Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Ziyang and Pan, Yongsheng and Ye, Yiwen and Lu, Mengkang and Xia, Yong}, title = {Each Test Image Deserves A Specific Prompt: Continual Test-Time Adaptation for 2D Medical Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11184-11193} }
Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Gengyu and Tang, Hao and Yan, Yan}, title = {Versatile Navigation Under Partial Observability via Value-guided Diffusion Policy}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17943-17951} }
All in One Framework for Multimodal Re-identification in the Wild-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, He and Ye, Mang and Zhang, Ming and Du, Bo}, title = {All in One Framework for Multimodal Re-identification in the Wild}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17459-17469} }
Looking 3D: Anomaly Detection with 2D-3D Alignment-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bhunia_2024_CVPR, author = {Bhunia, Ankan and Li, Changjian and Bilen, Hakan}, title = {Looking 3D: Anomaly Detection with 2D-3D Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17263-17272} }
VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift-
[pdf]
[supp]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Leyuan and Li, Yuhan and Gao, Yunqi and Gao, Changxin and Liu, Yuanyuan and Chen, Jingying}, title = {VS: Reconstructing Clothed 3D Human from Single Image via Vertex Shift}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10498-10507} }
PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Weng_2024_CVPR, author = {Weng, Xinshuo and Ivanovic, Boris and Wang, Yan and Wang, Yue and Pavone, Marco}, title = {PARA-Drive: Parallelized Architecture for Real-time Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15449-15458} }
Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xuan_2024_CVPR, author = {Xuan, Shiyu and Guo, Qingpei and Yang, Ming and Zhang, Shiliang}, title = {Pink: Unveiling the Power of Referential Comprehension for Multi-modal LLMs}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13838-13848} }
HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Qifan and Li, Juncheng and Wei, Longhui and Pang, Liang and Ye, Wentao and Qin, Bosheng and Tang, Siliang and Tian, Qi and Zhuang, Yueting}, title = {HalluciDoctor: Mitigating Hallucinatory Toxicity in Visual Instruction Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12944-12953} }
C^2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yiqun and Yang, Jiewen and Wang, Hualiang and Ding, Xinpeng and Zhao, Wei and Li, Xiaomeng}, title = {C{\textasciicircum}2RV: Cross-Regional and Cross-View Learning for Sparse-View CBCT Reconstruction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11205-11214} }
GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kumar_2024_CVPR, author = {Kumar, Prashant and Bhat, Kshitij Madhav and Nadkarni, Vedang Bhupesh Shenvi and Kalra, Prem}, title = {GLiDR: Topologically Regularized Graph Generative Network for Sparse LiDAR Point Clouds}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15152-15161} }
Commonsense Prototype for Outdoor Unsupervised 3D Object Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wu_2024_CVPR, author = {Wu, Hai and Zhao, Shijia and Huang, Xun and Wen, Chenglu and Li, Xin and Wang, Cheng}, title = {Commonsense Prototype for Outdoor Unsupervised 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14968-14977} }
Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Zihan and Li, Xiangyang and Yang, Jiahao and Liu, Yeqi and Hu, Junjie and Jiang, Ming and Jiang, Shuqiang}, title = {Lookahead Exploration with Neural Radiance Representation for Continuous Vision-Language Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13753-13762} }
Learning Vision from Models Rivals Learning Vision from Data-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Tian_2024_CVPR, author = {Tian, Yonglong and Fan, Lijie and Chen, Kaifeng and Katabi, Dina and Krishnan, Dilip and Isola, Phillip}, title = {Learning Vision from Models Rivals Learning Vision from Data}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15887-15898} }
Adapting Short-Term Transformers for Action Detection in Untrimmed Videos-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Min and Gao, Huan and Guo, Ping and Wang, Limin}, title = {Adapting Short-Term Transformers for Action Detection in Untrimmed Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18570-18579} }
SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Herau_2024_CVPR, author = {Herau, Quentin and Piasco, Nathan and Bennehar, Moussab and Roldao, Luis and Tsishkou, Dzmitry and Migniot, Cyrille and Vasseur, Pascal and Demonceaux, C\'edric}, title = {SOAC: Spatio-Temporal Overlap-Aware Multi-Sensor Calibration using Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15131-15140} }
G^3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Yuan and Li, Yali and Wang, Shengjin}, title = {G{\textasciicircum}3-LQ: Marrying Hyperbolic Alignment with Explicit Semantic-Geometric Modeling for 3D Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13917-13926} }
ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing-
[pdf]
[supp]
[bibtex]@InProceedings{Thakral_2024_CVPR, author = {Thakral, Kartik and Prasad, Shashikant and Aswani, Stuti and Vatsa, Mayank and Singh, Richa}, title = {ToonerGAN: Reinforcing GANs for Obfuscating Automated Facial Indexing}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10875-10884} }
Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Yuxi and Wang, Zi and Lu, Yifan and Xu, Chenxin and Liu, Changxing and Zhao, Hao and Chen, Siheng and Wang, Yanfeng}, title = {Editable Scene Simulation for Autonomous Driving via Collaborative LLM-Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15077-15087} }
SnAG: Scalable and Accurate Video Grounding-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mu_2024_CVPR, author = {Mu, Fangzhou and Mo, Sicheng and Li, Yin}, title = {SnAG: Scalable and Accurate Video Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18930-18940} }
Building Vision-Language Models on Solid Foundations with Masked Distillation-
[pdf]
[bibtex]@InProceedings{Sameni_2024_CVPR, author = {Sameni, Sepehr and Kafle, Kushal and Tan, Hao and Jenni, Simon}, title = {Building Vision-Language Models on Solid Foundations with Masked Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14216-14226} }
TransLoc4D: Transformer-based 4D Radar Place Recognition-
[pdf]
[supp]
[bibtex]@InProceedings{Peng_2024_CVPR, author = {Peng, Guohao and Li, Heshan and Zhao, Yangyang and Zhang, Jun and Wu, Zhenyu and Zheng, Pengyu and Wang, Danwei}, title = {TransLoc4D: Transformer-based 4D Radar Place Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17595-17605} }
Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ntinou_2024_CVPR, author = {Ntinou, Ioanna and Sanchez, Enrique and Tzimiropoulos, Georgios}, title = {Multiscale Vision Transformers Meet Bipartite Matching for Efficient Single-stage Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18827-18836} }
Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wakai_2024_CVPR, author = {Wakai, Nobuhiko and Sato, Satoshi and Ishii, Yasunori and Yamashita, Takayoshi}, title = {Deep Single Image Camera Calibration by Heatmap Regression to Recover Fisheye Images Under Manhattan World Assumption}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11884-11894} }
CSTA: CNN-based Spatiotemporal Attention for Video Summarization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Son_2024_CVPR, author = {Son, Jaewon and Park, Jaehun and Kim, Kwangsu}, title = {CSTA: CNN-based Spatiotemporal Attention for Video Summarization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18847-18856} }
PEM: Prototype-based Efficient MaskFormer for Image Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cavagnero_2024_CVPR, author = {Cavagnero, Niccol\`o and Rosi, Gabriele and Cuttano, Claudia and Pistilli, Francesca and Ciccone, Marco and Averta, Giuseppe and Cermelli, Fabio}, title = {PEM: Prototype-based Efficient MaskFormer for Image Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15804-15813} }
Referring Expression Counting-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Siyang and Liu, Jun and Cheung, Ngai-Man}, title = {Referring Expression Counting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16985-16995} }
Learning to Predict Activity Progress by Self-Supervised Video Alignment-
[pdf]
[supp]
[bibtex]@InProceedings{Donahue_2024_CVPR, author = {Donahue, Gerard and Elhamifar, Ehsan}, title = {Learning to Predict Activity Progress by Self-Supervised Video Alignment}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18667-18677} }
VicTR: Video-conditioned Text Representations for Activity Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Kahatapitiya_2024_CVPR, author = {Kahatapitiya, Kumara and Arnab, Anurag and Nagrani, Arsha and Ryoo, Michael S.}, title = {VicTR: Video-conditioned Text Representations for Activity Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18547-18558} }
Label-Efficient Group Robustness via Out-of-Distribution Concept Curation-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Yiwei and Liu, Anthony Z. and Wolfe, Robert and Caliskan, Aylin and Howe, Bill}, title = {Label-Efficient Group Robustness via Out-of-Distribution Concept Curation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12426-12434} }
3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images-
[pdf]
[supp]
[bibtex]@InProceedings{Men_2024_CVPR, author = {Men, Yifang and Liu, Hanxi and Yao, Yuan and Cui, Miaomiao and Xie, Xuansong and Lian, Zhouhui}, title = {3DToonify: Creating Your High-Fidelity 3D Stylized Avatar Easily from 2D Portrait Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10127-10137} }
Investigating Compositional Challenges in Vision-Language Models for Visual Grounding-
[pdf]
[supp]
[bibtex]@InProceedings{Zeng_2024_CVPR, author = {Zeng, Yunan and Huang, Yan and Zhang, Jinjin and Jie, Zequn and Chai, Zhenhua and Wang, Liang}, title = {Investigating Compositional Challenges in Vision-Language Models for Visual Grounding}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14141-14151} }
6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation-
[pdf]
[supp]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Li and Qu, Haoxuan and Cai, Yujun and Liu, Jun}, title = {6D-Diff: A Keypoint Diffusion Framework for 6D Object Pose Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9676-9686} }
Generative Region-Language Pretraining for Open-Ended Object Detection-
[pdf]
[arXiv]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Chuang and Jiang, Yi and Qu, Lizhen and Yuan, Zehuan and Cai, Jianfei}, title = {Generative Region-Language Pretraining for Open-Ended Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13958-13968} }
Enhancing Post-training Quantization Calibration through Contrastive Learning-
[pdf]
[bibtex]@InProceedings{Shang_2024_CVPR, author = {Shang, Yuzhang and Liu, Gaowen and Kompella, Ramana Rao and Yan, Yan}, title = {Enhancing Post-training Quantization Calibration through Contrastive Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15921-15930} }
Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Xin and Wu, Yunfei and Jiang, Xinghua and Guo, Zhihao and Gong, Mingming and Cao, Haoyu and Liu, Yinsong and Jiang, Deqiang and Sun, Xing}, title = {Enhancing Visual Document Understanding with Contrastive Learning in Large Visual-Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15546-15555} }
Data Valuation and Detections in Federated Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Wenqian and Fu, Shuran and Zhang, Fengrui and Pang, Yan}, title = {Data Valuation and Detections in Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12027-12036} }
Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nam_2024_CVPR, author = {Nam, Hyeongjin and Jung, Daniel Sungho and Moon, Gyeongsik and Lee, Kyoung Mu}, title = {Joint Reconstruction of 3D Human and Object via Contact-Based Refinement Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10218-10227} }
TIM: A Time Interval Machine for Audio-Visual Action Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chalk_2024_CVPR, author = {Chalk, Jacob and Huh, Jaesung and Kazakos, Evangelos and Zisserman, Andrew and Damen, Dima}, title = {TIM: A Time Interval Machine for Audio-Visual Action Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18153-18163} }
Would Deep Generative Models Amplify Bias in Future Models?-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tianwei and Hirota, Yusuke and Otani, Mayu and Garcia, Noa and Nakashima, Yuta}, title = {Would Deep Generative Models Amplify Bias in Future Models?}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10833-10843} }
CogAgent: A Visual Language Model for GUI Agents-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Hong_2024_CVPR, author = {Hong, Wenyi and Wang, Weihan and Lv, Qingsong and Xu, Jiazheng and Yu, Wenmeng and Ji, Junhui and Wang, Yan and Wang, Zihan and Dong, Yuxiao and Ding, Ming and Tang, Jie}, title = {CogAgent: A Visual Language Model for GUI Agents}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14281-14290} }
AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Mingfu and Su, Jong-Chyi and Schulter, Samuel and Garg, Sparsh and Zhao, Shiyu and Wu, Ying and Chandraker, Manmohan}, title = {AIDE: An Automatic Data Engine for Object Detection in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14695-14706} }
Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Zhihao and Wang, Zidong and Xie, Siwen and Liu, Anji and Fan, Lifeng}, title = {Smart Help: Strategic Opponent Modeling for Proactive and Adaptive Robot Assistance in Households}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18091-18101} }
Rapid Motor Adaptation for Robotic Manipulator Arms-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liang_2024_CVPR, author = {Liang, Yichao and Ellis, Kevin and Henriques, Jo\~ao}, title = {Rapid Motor Adaptation for Robotic Manipulator Arms}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16404-16413} }
WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ahn_2024_CVPR, author = {Ahn, Yong Hyun and Kim, Hyeon Bae and Kim, Seong Tae}, title = {WWW: A Unified Framework for Explaining What Where and Why of Neural Networks by Interpretation of Neuron Concepts}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10968-10977} }
CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection-
[pdf]
[supp]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Haonan and Liu, Longjun and Huang, Yuqi and Yang, Zhao and Lei, Xinyu and Wen, Bihan}, title = {CaKDP: Category-aware Knowledge Distillation and Pruning Framework for Lightweight 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15331-15341} }
ICP-Flow: LiDAR Scene Flow Estimation with ICP-
[pdf]
[supp]
[bibtex]@InProceedings{Lin_2024_CVPR, author = {Lin, Yancong and Caesar, Holger}, title = {ICP-Flow: LiDAR Scene Flow Estimation with ICP}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15501-15511} }
MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cao_2024_CVPR, author = {Cao, Jianjian and Ye, Peng and Li, Shengze and Yu, Chong and Tang, Yansong and Lu, Jiwen and Chen, Tao}, title = {MADTP: Multimodal Alignment-Guided Dynamic Token Pruning for Accelerating Vision-Language Transformer}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15710-15719} }
G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images-
[pdf]
[supp]
[bibtex]@InProceedings{Huang_2024_CVPR, author = {Huang, Zixiong and Chen, Qi and Sun, Libo and Yang, Yifan and Wang, Naizhou and Wu, Qi and Tan, Mingkui}, title = {G-NeRF: Geometry-enhanced Novel View Synthesis from Single-View Images}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10117-10126} }
SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency-
[pdf]
[supp]
[bibtex]@InProceedings{Roetzer_2024_CVPR, author = {Roetzer, Paul and Bernard, Florian}, title = {SpiderMatch: 3D Shape Matching with Global Optimality and Geometric Consistency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14543-14553} }
Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Fan_2024_CVPR, author = {Fan, Lei and Liang, Mingfu and Li, Yunxuan and Hua, Gang and Wu, Ying}, title = {Evidential Active Recognition: Intelligent and Prudent Open-World Embodied Perception}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16351-16361} }
The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Trivigno_2024_CVPR, author = {Trivigno, Gabriele and Masone, Carlo and Caputo, Barbara and Sattler, Torsten}, title = {The Unreasonable Effectiveness of Pre-Trained Features for Camera Pose Refinement}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12786-12798} }
CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Sun_2024_CVPR, author = {Sun, Shuyang and Li, Runjia and Torr, Philip and Gu, Xiuye and Li, Siyang}, title = {CLIP as RNN: Segment Countless Visual Concepts without Training Endeavor}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13171-13182} }
Active Generalized Category Discovery-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ma_2024_CVPR, author = {Ma, Shijie and Zhu, Fei and Zhong, Zhun and Zhang, Xu-Yao and Liu, Cheng-Lin}, title = {Active Generalized Category Discovery}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16890-16900} }
OpenBias: Open-set Bias Detection in Text-to-Image Generative Models-
[pdf]
[supp]
[bibtex]@InProceedings{D'Inca_2024_CVPR, author = {D'Inc\`a, Moreno and Peruzzo, Elia and Mancini, Massimiliano and Xu, Dejia and Goel, Vidit and Xu, Xingqian and Wang, Zhangyang and Shi, Humphrey and Sebe, Nicu}, title = {OpenBias: Open-set Bias Detection in Text-to-Image Generative Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12225-12235} }
3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Chenfeng and Ling, Huan and Fidler, Sanja and Litany, Or}, title = {3DiffTection: 3D Object Detection with Geometry-Aware Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10617-10627} }
LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Linqing and Xu, Xiuwei and Wang, Ziwei and Zhang, Yunpeng and Zhang, Borui and Zheng, Wenzhao and Du, Dalong and Zhou, Jie and Lu, Jiwen}, title = {LowRankOcc: Tensor Decomposition and Low-Rank Recovery for Vision-based 3D Semantic Occupancy Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9806-9815} }
Novel View Synthesis with View-Dependent Effects from a Single Image-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Bello_2024_CVPR, author = {Bello, Juan Luis Gonzalez and Kim, Munchurl}, title = {Novel View Synthesis with View-Dependent Effects from a Single Image}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10413-10423} }
Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision-
[pdf]
[arXiv]
[bibtex]@InProceedings{Yu_2024_CVPR, author = {Yu, Yi and Yang, Xue and Li, Qingyun and Da, Feipeng and Dai, Jifeng and Qiao, Yu and Yan, Junchi}, title = {Point2RBox: Combine Knowledge from Synthetic Visual Patterns for End-to-end Oriented Object Detection with Single Point Supervision}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16783-16793} }
HRVDA: High-Resolution Visual Document Assistant-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Chaohu and Yin, Kun and Cao, Haoyu and Jiang, Xinghua and Li, Xin and Liu, Yinsong and Jiang, Deqiang and Sun, Xing and Xu, Linli}, title = {HRVDA: High-Resolution Visual Document Assistant}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15534-15545} }
Learning for Transductive Threshold Calibration in Open-World Recognition-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Qin and An, Dongsheng and Xiao, Tianjun and He, Tong and Tang, Qingming and Wu, Ying Nian and Tighe, Joseph and Xing, Yifan}, title = {Learning for Transductive Threshold Calibration in Open-World Recognition}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17097-17106} }
Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Xingqun and Pan, Jiahao and Li, Peng and Yuan, Ruibin and Chi, Xiaowei and Li, Mengfei and Luo, Wenhan and Xue, Wei and Zhang, Shanghang and Liu, Qifeng and Guo, Yike}, title = {Weakly-Supervised Emotion Transition Learning for Diverse 3D Co-speech Gesture Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10424-10434} }
Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Shitian and Li, Zhuowan and Lu, Yadong and Yuille, Alan and Wang, Yan}, title = {Causal-CoG: A Causal-Effect Look at Context Generation for Boosting Multi-modal Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13342-13351} }
Brush2Prompt: Contextual Prompt Generator for Object Inpainting-
[pdf]
[supp]
[bibtex]@InProceedings{Chiu_2024_CVPR, author = {Chiu, Mang Tik and Zhou, Yuqian and Zhang, Lingzhi and Lin, Zhe and Barnes, Connelly and Amirghodsi, Sohrab and Shechtman, Eli and Shi, Humphrey}, title = {Brush2Prompt: Contextual Prompt Generator for Object Inpainting}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12636-12645} }
Joint-Task Regularization for Partially Labeled Multi-Task Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nishi_2024_CVPR, author = {Nishi, Kento and Kim, Junsik and Li, Wanhua and Pfister, Hanspeter}, title = {Joint-Task Regularization for Partially Labeled Multi-Task Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16152-16162} }
Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification-
[pdf]
[supp]
[bibtex]@InProceedings{Yang_2024_CVPR, author = {Yang, Bin and Chen, Jun and Ye, Mang}, title = {Shallow-Deep Collaborative Learning for Unsupervised Visible-Infrared Person Re-Identification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16870-16879} }
Context-Aware Integration of Language and Visual References for Natural Language Tracking-
[pdf]
[arXiv]
[bibtex]@InProceedings{Shao_2024_CVPR, author = {Shao, Yanyan and He, Shuting and Ye, Qi and Feng, Yuchao and Luo, Wenhan and Chen, Jiming}, title = {Context-Aware Integration of Language and Visual References for Natural Language Tracking}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {19208-19217} }
An Edit Friendly DDPM Noise Space: Inversion and Manipulations-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Huberman-Spiegelglas_2024_CVPR, author = {Huberman-Spiegelglas, Inbar and Kulikov, Vladimir and Michaeli, Tomer}, title = {An Edit Friendly DDPM Noise Space: Inversion and Manipulations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12469-12478} }
RoDLA: Benchmarking the Robustness of Document Layout Analysis Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Yufan and Zhang, Jiaming and Peng, Kunyu and Zheng, Junwei and Liu, Ruiping and Torr, Philip and Stiefelhagen, Rainer}, title = {RoDLA: Benchmarking the Robustness of Document Layout Analysis Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15556-15566} }
BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks-
[pdf]
[supp]
[bibtex]@InProceedings{Gao_2024_CVPR, author = {Gao, Shangqian and Zhang, Yanfu and Huang, Feihu and Huang, Heng}, title = {BilevelPruning: Unified Dynamic and Static Channel Pruning for Convolutional Neural Networks}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16090-16100} }
IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse-
[pdf]
[supp]
[bibtex]@InProceedings{Dai_2024_CVPR, author = {Dai, Yunshu and Fei, Jianwei and Huang, Fangjun}, title = {IDGuard: Robust General Identity-centric POI Proactive Defense Against Face Editing Abuse}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11934-11943} }
Viewpoint-Aware Visual Grounding in 3D Scenes-
[pdf]
[supp]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Xiangxi and Wu, Zhonghua and Lee, Stefan}, title = {Viewpoint-Aware Visual Grounding in 3D Scenes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14056-14065} }
CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Zhao_2024_CVPR, author = {Zhao, Lingjun and Song, Jingyu and Skinner, Katherine A.}, title = {CRKD: Enhanced Camera-Radar Object Detection with Cross-modality Knowledge Distillation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15470-15480} }
CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Shaowei and Zhang, Lingling and Zhu, Longji and Qin, Tao and Yap, Kim-Hui and Zhang, Xinyu and Liu, Jun}, title = {CoG-DQA: Chain-of-Guiding Learning with Large Language Models for Diagram Question Answering}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13969-13979} }
Transferable and Principled Efficiency for Open-Vocabulary Segmentation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xu_2024_CVPR, author = {Xu, Jingxuan and Chen, Wuyang and Zhao, Yao and Wei, Yunchao}, title = {Transferable and Principled Efficiency for Open-Vocabulary Segmentation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15814-15824} }
EvDiG: Event-guided Direct and Global Components Separation-
[pdf]
[supp]
[bibtex]@InProceedings{Zhou_2024_CVPR, author = {Zhou, Xinyu and Duan, Peiqi and Li, Boyu and Zhou, Chu and Xu, Chao and Shi, Boxin}, title = {EvDiG: Event-guided Direct and Global Components Separation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9612-9621} }
Feedback-Guided Autonomous Driving-
[pdf]
[bibtex]@InProceedings{Zhang_2024_CVPR, author = {Zhang, Jimuyang and Huang, Zanming and Ray, Arijit and Ohn-Bar, Eshed}, title = {Feedback-Guided Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15000-15011} }
DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency-
[pdf]
[supp]
[bibtex]@InProceedings{Guo_2024_CVPR, author = {Guo, Heng and Ren, Jieji and Wang, Feishi and Shi, Boxin and Ren, Mingjun and Matsushita, Yasuyuki}, title = {DiLiGenRT: A Photometric Stereo Dataset with Quantified Roughness and Translucency}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11810-11820} }
De-Diffusion Makes Text a Strong Cross-Modal Interface-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wei_2024_CVPR, author = {Wei, Chen and Liu, Chenxi and Qiao, Siyuan and Zhang, Zhishuai and Yuille, Alan and Yu, Jiahui}, title = {De-Diffusion Makes Text a Strong Cross-Modal Interface}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13492-13503} }
End-to-End Spatio-Temporal Action Localisation with Video Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Gritsenko_2024_CVPR, author = {Gritsenko, Alexey A. and Xiong, Xuehan and Djolonga, Josip and Dehghani, Mostafa and Sun, Chen and Lucic, Mario and Schmid, Cordelia and Arnab, Anurag}, title = {End-to-End Spatio-Temporal Action Localisation with Video Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18373-18383} }
End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Shuming and Zhang, Chen-Lin and Zhao, Chen and Ghanem, Bernard}, title = {End-to-End Temporal Action Detection with 1B Parameters Across 1000 Frames}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18591-18601} }
TransNeXt: Robust Foveal Visual Perception for Vision Transformers-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Shi_2024_CVPR, author = {Shi, Dai}, title = {TransNeXt: Robust Foveal Visual Perception for Vision Transformers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17773-17783} }
Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Jaume_2024_CVPR, author = {Jaume, Guillaume and Vaidya, Anurag and Chen, Richard J. and Williamson, Drew F.K. and Liang, Paul Pu and Mahmood, Faisal}, title = {Modeling Dense Multimodal Interactions Between Biological Pathways and Histology for Survival Prediction}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11579-11590} }
Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Nguyen_2024_CVPR, author = {Nguyen, Hoang Chuong and Wang, Tianyu and Alvarez, Jose M. and Liu, Miaomiao}, title = {Mining Supervision for Dynamic Regions in Self-Supervised Monocular Depth Estimation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10446-10455} }
Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models-
[pdf]
[supp]
[bibtex]@InProceedings{Stotko_2024_CVPR, author = {Stotko, David and Wandel, Nils and Klein, Reinhard}, title = {Physics-guided Shape-from-Template: Monocular Video Perception through Neural Surrogate Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11895-11904} }
You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval-
[pdf]
[supp]
[bibtex]@InProceedings{Koley_2024_CVPR, author = {Koley, Subhadeep and Bhunia, Ayan Kumar and Sain, Aneeshan and Chowdhury, Pinaki Nath and Xiang, Tao and Song, Yi-Zhe}, title = {You'll Never Walk Alone: A Sketch and Text Duet for Fine-Grained Image Retrieval}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16509-16519} }
Unsupervised 3D Structure Inference from Category-Specific Image Collections-
[pdf]
[supp]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Weikang and Cao, Dongliang and Bernard, Florian}, title = {Unsupervised 3D Structure Inference from Category-Specific Image Collections}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10704-10714} }
DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations-
[pdf]
[supp]
[bibtex]@InProceedings{Augustin_2024_CVPR, author = {Augustin, Maximilian and Neuhaus, Yannic and Hein, Matthias}, title = {DiG-IN: Diffusion Guidance for Investigating Networks - Uncovering Classifier Differences Neuron Visualisations and Visual Counterfactual Explanations}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11093-11103} }
RepViT: Revisiting Mobile CNN From ViT Perspective-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Ao and Chen, Hui and Lin, Zijia and Han, Jungong and Ding, Guiguang}, title = {RepViT: Revisiting Mobile CNN From ViT Perspective}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {15909-15920} }
MonoNPHM: Dynamic Head Reconstruction from Monocular Videos-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Giebenhain_2024_CVPR, author = {Giebenhain, Simon and Kirschstein, Tobias and Georgopoulos, Markos and R\"unz, Martin and Agapito, Lourdes and Nie{\ss}ner, Matthias}, title = {MonoNPHM: Dynamic Head Reconstruction from Monocular Videos}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10747-10758} }
Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization-
[pdf]
[supp]
[bibtex]@InProceedings{Xia_2024_CVPR, author = {Xia, Ziying and Cheng, Jian and Liu, Siyu and Hu, Yongxiang and Wang, Shiguang and Zhang, Yijie and Dang, Liwan}, title = {Realigning Confidence with Temporal Saliency Information for Point-Level Weakly-Supervised Temporal Action Localization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18440-18450} }
Theoretically Achieving Continuous Representation of Oriented Bounding Boxes-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Xiao_2024_CVPR, author = {Xiao, Zikai and Yang, Guoye and Yang, Xue and Mu, Taijiang and Yan, Junchi and Hu, Shimin}, title = {Theoretically Achieving Continuous Representation of Oriented Bounding Boxes}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16912-16922} }
Learning Large-Factor EM Image Super-Resolution with Generative Priors-
[pdf]
[supp]
[bibtex]@InProceedings{Shou_2024_CVPR, author = {Shou, Jiateng and Xiao, Zeyu and Deng, Shiyu and Huang, Wei and Shi, Peiyao and Zhang, Ruobing and Xiong, Zhiwei and Wu, Feng}, title = {Learning Large-Factor EM Image Super-Resolution with Generative Priors}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11313-11322} }
Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Cheng_2024_CVPR, author = {Cheng, Junda and Yin, Wei and Wang, Kaixuan and Chen, Xiaozhi and Wang, Shijie and Yang, Xin}, title = {Adaptive Fusion of Single-View and Multi-View Depth for Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10138-10147} }
Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Ye_2024_CVPR, author = {Ye, Yiwen and Xie, Yutong and Zhang, Jianpeng and Chen, Ziyang and Wu, Qi and Xia, Yong}, title = {Continual Self-supervised Learning: Towards Universal Multi-modal Medical Data Representation Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11114-11124} }
Towards Efficient Replay in Federated Incremental Learning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Yichen and Li, Qunwei and Wang, Haozhao and Li, Ruixuan and Zhong, Wenliang and Zhang, Guannan}, title = {Towards Efficient Replay in Federated Incremental Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12820-12829} }
SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Feifei and Tan, Zhentao and Wei, Tianyi and Wu, Yue and Huang, Qidong}, title = {SimAC: A Simple Anti-Customization Method for Protecting Face Privacy against Text-to-Image Synthesis of Diffusion Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12047-12056} }
Fair-VPT: Fair Visual Prompt Tuning for Image Classification-
[pdf]
[supp]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Sungho and Byun, Hyeran}, title = {Fair-VPT: Fair Visual Prompt Tuning for Image Classification}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12268-12278} }
CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving-
[pdf]
[supp]
[bibtex]@InProceedings{Pourkeshavarz_2024_CVPR, author = {Pourkeshavarz, Mozhgan and Zhang, Junrui and Rasouli, Amir}, title = {CaDeT: a Causal Disentanglement Approach for Robust Trajectory Prediction in Autonomous Driving}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14874-14884} }
Prompting Vision Foundation Models for Pathology Image Analysis-
[pdf]
[supp]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Chong and Liu, Siqi and Zhou, Kaiyang and Wong, Vincent Wai-Sun and Yuen, Pong C.}, title = {Prompting Vision Foundation Models for Pathology Image Analysis}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {11292-11301} }
SEED-Bench: Benchmarking Multimodal Large Language Models-
[pdf]
[supp]
[bibtex]@InProceedings{Li_2024_CVPR, author = {Li, Bohao and Ge, Yuying and Ge, Yixiao and Wang, Guangzhi and Wang, Rui and Zhang, Ruimao and Shan, Ying}, title = {SEED-Bench: Benchmarking Multimodal Large Language Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13299-13308} }
Object Pose Estimation via the Aggregation of Diffusion Features-
[pdf]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Tianfu and Hu, Guosheng and Wang, Hongguang}, title = {Object Pose Estimation via the Aggregation of Diffusion Features}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {10238-10247} }
Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers-
[pdf]
[supp]
[bibtex]@InProceedings{Chen_2024_CVPR, author = {Chen, Tsai-Shien and Siarohin, Aliaksandr and Menapace, Willi and Deyneka, Ekaterina and Chao, Hsiang-wei and Jeon, Byung Eun and Fang, Yuwei and Lee, Hsin-Ying and Ren, Jian and Yang, Ming-Hsuan and Tulyakov, Sergey}, title = {Panda-70M: Captioning 70M Videos with Multiple Cross-Modality Teachers}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {13320-13331} }
Infrared Small Target Detection with Scale and Location Sensitivity-
[pdf]
[arXiv]
[bibtex]@InProceedings{Liu_2024_CVPR, author = {Liu, Qiankun and Liu, Rui and Zheng, Bolun and Wang, Hongkui and Fu, Ying}, title = {Infrared Small Target Detection with Scale and Location Sensitivity}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {17490-17499} }
Self-supervised Debiasing Using Low Rank Regularization-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Geon Yeong and Jung, Chanyong and Lee, Sangmin and Ye, Jong Chul and Lee, Sang Wan}, title = {Self-supervised Debiasing Using Low Rank Regularization}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12395-12405} }
Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Iurada_2024_CVPR, author = {Iurada, Leonardo and Ciccone, Marco and Tommasi, Tatiana}, title = {Finding Lottery Tickets in Vision Models via Data-driven Spectral Foresight Pruning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16142-16151} }
InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Wang_2024_CVPR, author = {Wang, Dongqing and Zhang, Tong and Abboud, Alaa and S\"usstrunk, Sabine}, title = {InNeRF360: Text-Guided 3D-Consistent Object Inpainting on 360-degree Neural Radiance Fields}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12677-12686} }
IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection-
[pdf]
[bibtex]@InProceedings{Yin_2024_CVPR, author = {Yin, Junbo and Shen, Jianbing and Chen, Runnan and Li, Wei and Yang, Ruigang and Frossard, Pascal and Wang, Wenguan}, title = {IS-Fusion: Instance-Scene Collaborative Fusion for Multimodal 3D Object Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14905-14915} }
Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Park_2024_CVPR, author = {Park, Jeonghoon and Chung, Chaeyeon and Choo, Jaegul}, title = {Enhancing Intrinsic Features for Debiasing via Investigating Class-Discerning Common Attributes in Bias-Contrastive Pair}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12332-12341} }
Compositional Chain-of-Thought Prompting for Large Multimodal Models-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Mitra_2024_CVPR, author = {Mitra, Chancharik and Huang, Brandon and Darrell, Trevor and Herzig, Roei}, title = {Compositional Chain-of-Thought Prompting for Large Multimodal Models}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {14420-14431} }
Diffusion Time-step Curriculum for One Image to 3D Generation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Yi_2024_CVPR, author = {Yi, Xuanyu and Wu, Zike and Xu, Qingshan and Zhou, Pan and Lim, Joo-Hwee and Zhang, Hanwang}, title = {Diffusion Time-step Curriculum for One Image to 3D Generation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {9948-9958} }
Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning-
[pdf]
[supp]
[bibtex]@InProceedings{Qi_2024_CVPR, author = {Qi, Fan and Li, Shuai}, title = {Adaptive Hyper-graph Aggregation for Modality-Agnostic Federated Learning}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {12312-12321} }
SPIN: Simultaneous Perception Interaction and Navigation-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Uppal_2024_CVPR, author = {Uppal, Shagun and Agarwal, Ananye and Xiong, Haoyu and Shaw, Kenneth and Pathak, Deepak}, title = {SPIN: Simultaneous Perception Interaction and Navigation}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {18133-18142} }
Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection-
[pdf]
[supp]
[arXiv]
[bibtex]@InProceedings{Lei_2024_CVPR, author = {Lei, Ting and Yin, Shaofeng and Liu, Yang}, title = {Exploring the Potential of Large Foundation Models for Open-Vocabulary HOI Detection}, booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, month = {June}, year = {2024}, pages = {16657-16667} }
Back